github.com/anchore/syft@v1.38.2/syft/file/cataloger/filecontent/cataloger.go (about)

     1  package filecontent
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"encoding/base64"
     7  	"fmt"
     8  	"io"
     9  
    10  	"github.com/dustin/go-humanize"
    11  
    12  	"github.com/anchore/syft/internal"
    13  	"github.com/anchore/syft/internal/bus"
    14  	intFile "github.com/anchore/syft/internal/file"
    15  	"github.com/anchore/syft/internal/log"
    16  	"github.com/anchore/syft/internal/unknown"
    17  	"github.com/anchore/syft/syft/event/monitor"
    18  	"github.com/anchore/syft/syft/file"
    19  )
    20  
    21  type Config struct {
    22  	// Globs are the file patterns that must be matched for a file to be considered for cataloging.
    23  	Globs []string `yaml:"globs" json:"globs" mapstructure:"globs"`
    24  
    25  	// SkipFilesAboveSize is the maximum file size (in bytes) to allow to be considered while cataloging. If the file is larger than this size it will be skipped.
    26  	SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"`
    27  }
    28  
    29  type Cataloger struct {
    30  	globs                     []string
    31  	skipFilesAboveSizeInBytes int64
    32  }
    33  
    34  func DefaultConfig() Config {
    35  	return Config{
    36  		SkipFilesAboveSize: 250 * intFile.KB,
    37  	}
    38  }
    39  
    40  func NewCataloger(cfg Config) *Cataloger {
    41  	return &Cataloger{
    42  		globs:                     cfg.Globs,
    43  		skipFilesAboveSizeInBytes: cfg.SkipFilesAboveSize,
    44  	}
    45  }
    46  
    47  func (i *Cataloger) Catalog(_ context.Context, resolver file.Resolver) (map[file.Coordinates]string, error) {
    48  	results := make(map[file.Coordinates]string)
    49  	var locations []file.Location
    50  	var errs error
    51  
    52  	locations, err := resolver.FilesByGlob(i.globs...)
    53  	if err != nil {
    54  		return nil, err
    55  	}
    56  
    57  	prog := catalogingProgress(int64(len(locations)))
    58  
    59  	for _, location := range locations {
    60  		prog.AtomicStage.Set(location.Path())
    61  
    62  		metadata, err := resolver.FileMetadataByLocation(location)
    63  		if err != nil {
    64  			errs = unknown.Append(errs, location, err)
    65  			prog.SetError(err)
    66  			continue
    67  		}
    68  
    69  		if i.skipFilesAboveSizeInBytes > 0 && metadata.Size() > i.skipFilesAboveSizeInBytes {
    70  			continue
    71  		}
    72  
    73  		result, err := i.catalogLocation(resolver, location)
    74  		if internal.IsErrPathPermission(err) {
    75  			errs = unknown.Append(errs, location, fmt.Errorf("permission error reading file contents: %w", err))
    76  			continue
    77  		}
    78  		if err != nil {
    79  			errs = unknown.Append(errs, location, err)
    80  			continue
    81  		}
    82  
    83  		prog.Increment()
    84  
    85  		results[location.Coordinates] = result
    86  	}
    87  
    88  	log.Debugf("file contents cataloger processed %d files", len(results))
    89  
    90  	prog.AtomicStage.Set(fmt.Sprintf("%s files", humanize.Comma(prog.Current())))
    91  	prog.SetCompleted()
    92  
    93  	return results, errs
    94  }
    95  
    96  func (i *Cataloger) catalogLocation(resolver file.Resolver, location file.Location) (string, error) {
    97  	contentReader, err := resolver.FileContentsByLocation(location)
    98  	if err != nil {
    99  		return "", err
   100  	}
   101  	defer internal.CloseAndLogError(contentReader, location.AccessPath)
   102  
   103  	buf := &bytes.Buffer{}
   104  	encoder := base64.NewEncoder(base64.StdEncoding, buf)
   105  	if _, err = io.Copy(encoder, contentReader); err != nil {
   106  		return "", internal.ErrPath{Context: "content-cataloger", Path: location.RealPath, Err: err}
   107  	}
   108  	// note: it's important to close the reader before reading from the buffer since closing will flush the remaining bytes
   109  	if err := encoder.Close(); err != nil {
   110  		return "", fmt.Errorf("unable to close base64 encoder: %w", err)
   111  	}
   112  
   113  	return buf.String(), nil
   114  }
   115  
   116  func catalogingProgress(locations int64) *monitor.TaskProgress {
   117  	info := monitor.GenericTask{
   118  		Title: monitor.Title{
   119  			Default: "File contents",
   120  		},
   121  		ParentID: monitor.TopLevelCatalogingTaskID,
   122  	}
   123  
   124  	return bus.StartCatalogerTask(info, locations, "")
   125  }