github.com/anchore/syft@v1.38.2/syft/file/cataloger/filedigest/cataloger.go (about)

     1  package filedigest
     2  
     3  import (
     4  	"context"
     5  	"crypto"
     6  	"errors"
     7  	"fmt"
     8  
     9  	"github.com/dustin/go-humanize"
    10  
    11  	"github.com/anchore/go-sync"
    12  	stereoscopeFile "github.com/anchore/stereoscope/pkg/file"
    13  	"github.com/anchore/syft/internal"
    14  	"github.com/anchore/syft/internal/bus"
    15  	intFile "github.com/anchore/syft/internal/file"
    16  	"github.com/anchore/syft/internal/log"
    17  	"github.com/anchore/syft/internal/unknown"
    18  	"github.com/anchore/syft/syft/cataloging"
    19  	"github.com/anchore/syft/syft/event/monitor"
    20  	"github.com/anchore/syft/syft/file"
    21  	intCataloger "github.com/anchore/syft/syft/file/cataloger/internal"
    22  )
    23  
    24  var ErrUndigestableFile = errors.New("undigestable file")
    25  
    26  type Cataloger struct {
    27  	hashes []crypto.Hash
    28  }
    29  
    30  func NewCataloger(hashes []crypto.Hash) *Cataloger {
    31  	return &Cataloger{
    32  		hashes: intFile.NormalizeHashes(hashes),
    33  	}
    34  }
    35  
    36  func (i *Cataloger) Catalog(ctx context.Context, resolver file.Resolver, coordinates ...file.Coordinates) (map[file.Coordinates][]file.Digest, error) {
    37  	results := make(map[file.Coordinates][]file.Digest)
    38  	var locations []file.Location
    39  
    40  	if len(coordinates) == 0 {
    41  		locations = intCataloger.AllRegularFiles(ctx, resolver)
    42  	} else {
    43  		for _, c := range coordinates {
    44  			locs, err := resolver.FilesByPath(c.RealPath)
    45  			if err != nil {
    46  				return nil, fmt.Errorf("unable to get file locations for path %q: %w", c.RealPath, err)
    47  			}
    48  			locations = append(locations, locs...)
    49  		}
    50  	}
    51  
    52  	prog := catalogingProgress(int64(len(locations)))
    53  
    54  	err := sync.Collect(&ctx, cataloging.ExecutorFile, sync.ToSeq(locations), func(location file.Location) ([]file.Digest, error) {
    55  		result, err := i.catalogLocation(ctx, resolver, location)
    56  
    57  		if errors.Is(err, ErrUndigestableFile) {
    58  			return nil, nil
    59  		}
    60  
    61  		prog.AtomicStage.Set(location.Path())
    62  
    63  		if internal.IsErrPathPermission(err) {
    64  			log.Debugf("file digests cataloger skipping %q: %+v", location.RealPath, err)
    65  			return nil, unknown.New(location, err)
    66  		}
    67  
    68  		if err != nil {
    69  			prog.SetError(err)
    70  			return nil, unknown.New(location, err)
    71  		}
    72  
    73  		prog.Increment()
    74  
    75  		return result, nil
    76  	}, func(location file.Location, digests []file.Digest) {
    77  		if len(digests) > 0 {
    78  			results[location.Coordinates] = digests
    79  		}
    80  	})
    81  
    82  	log.Debugf("file digests cataloger processed %d files", prog.Current())
    83  
    84  	prog.AtomicStage.Set(fmt.Sprintf("%s files", humanize.Comma(prog.Current())))
    85  	prog.SetCompleted()
    86  
    87  	return results, err
    88  }
    89  
    90  func (i *Cataloger) catalogLocation(ctx context.Context, resolver file.Resolver, location file.Location) ([]file.Digest, error) {
    91  	meta, err := resolver.FileMetadataByLocation(location)
    92  	if err != nil {
    93  		return nil, err
    94  	}
    95  
    96  	// we should only attempt to report digests for files that are regular files (don't attempt to resolve links)
    97  	if meta.Type != stereoscopeFile.TypeRegular {
    98  		return nil, ErrUndigestableFile
    99  	}
   100  
   101  	contentReader, err := resolver.FileContentsByLocation(location)
   102  	if err != nil {
   103  		return nil, err
   104  	}
   105  	defer internal.CloseAndLogError(contentReader, location.AccessPath)
   106  
   107  	digests, err := intFile.NewDigestsFromFile(ctx, contentReader, i.hashes)
   108  	if err != nil {
   109  		return nil, internal.ErrPath{Context: "digests-cataloger", Path: location.RealPath, Err: err}
   110  	}
   111  
   112  	return digests, nil
   113  }
   114  
   115  func catalogingProgress(locations int64) *monitor.TaskProgress {
   116  	info := monitor.GenericTask{
   117  		Title: monitor.Title{
   118  			Default: "File digests",
   119  		},
   120  		ParentID: monitor.TopLevelCatalogingTaskID,
   121  	}
   122  
   123  	return bus.StartCatalogerTask(info, locations, "")
   124  }