github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/file/cataloger/filecontent/cataloger.go (about)

     1  package filecontent
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"encoding/base64"
     7  	"fmt"
     8  	"io"
     9  
    10  	"github.com/dustin/go-humanize"
    11  
    12  	"github.com/anchore/syft/internal"
    13  	"github.com/anchore/syft/internal/bus"
    14  	intFile "github.com/anchore/syft/internal/file"
    15  	"github.com/anchore/syft/internal/log"
    16  	"github.com/anchore/syft/syft/event/monitor"
    17  	"github.com/anchore/syft/syft/file"
    18  )
    19  
    20  type Config struct {
    21  	// Globs are the file patterns that must be matched for a file to be considered for cataloging.
    22  	Globs []string `yaml:"globs" json:"globs" mapstructure:"globs"`
    23  
    24  	// SkipFilesAboveSize is the maximum file size (in bytes) to allow to be considered while cataloging. If the file is larger than this size it will be skipped.
    25  	SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"`
    26  }
    27  
    28  type Cataloger struct {
    29  	globs                     []string
    30  	skipFilesAboveSizeInBytes int64
    31  }
    32  
    33  func DefaultConfig() Config {
    34  	return Config{
    35  		SkipFilesAboveSize: 250 * intFile.KB,
    36  	}
    37  }
    38  
    39  func NewCataloger(cfg Config) *Cataloger {
    40  	return &Cataloger{
    41  		globs:                     cfg.Globs,
    42  		skipFilesAboveSizeInBytes: cfg.SkipFilesAboveSize,
    43  	}
    44  }
    45  
    46  func (i *Cataloger) Catalog(_ context.Context, resolver file.Resolver) (map[file.Coordinates]string, error) {
    47  	results := make(map[file.Coordinates]string)
    48  	var locations []file.Location
    49  
    50  	locations, err := resolver.FilesByGlob(i.globs...)
    51  	if err != nil {
    52  		return nil, err
    53  	}
    54  
    55  	prog := catalogingProgress(int64(len(locations)))
    56  
    57  	for _, location := range locations {
    58  		prog.AtomicStage.Set(location.Path())
    59  
    60  		metadata, err := resolver.FileMetadataByLocation(location)
    61  		if err != nil {
    62  			prog.SetError(err)
    63  			return nil, err
    64  		}
    65  
    66  		if i.skipFilesAboveSizeInBytes > 0 && metadata.Size() > i.skipFilesAboveSizeInBytes {
    67  			continue
    68  		}
    69  
    70  		result, err := i.catalogLocation(resolver, location)
    71  		if internal.IsErrPathPermission(err) {
    72  			log.Debugf("file contents cataloger skipping - %+v", err)
    73  			continue
    74  		}
    75  		if err != nil {
    76  			prog.SetError(err)
    77  			return nil, err
    78  		}
    79  
    80  		prog.Increment()
    81  
    82  		results[location.Coordinates] = result
    83  	}
    84  
    85  	log.Debugf("file contents cataloger processed %d files", len(results))
    86  
    87  	prog.AtomicStage.Set(fmt.Sprintf("%s files", humanize.Comma(prog.Current())))
    88  	prog.SetCompleted()
    89  
    90  	return results, nil
    91  }
    92  
    93  func (i *Cataloger) catalogLocation(resolver file.Resolver, location file.Location) (string, error) {
    94  	contentReader, err := resolver.FileContentsByLocation(location)
    95  	if err != nil {
    96  		return "", err
    97  	}
    98  	defer internal.CloseAndLogError(contentReader, location.AccessPath)
    99  
   100  	buf := &bytes.Buffer{}
   101  	encoder := base64.NewEncoder(base64.StdEncoding, buf)
   102  	if _, err = io.Copy(encoder, contentReader); err != nil {
   103  		return "", internal.ErrPath{Context: "content-cataloger", Path: location.RealPath, Err: err}
   104  	}
   105  	// note: it's important to close the reader before reading from the buffer since closing will flush the remaining bytes
   106  	if err := encoder.Close(); err != nil {
   107  		return "", fmt.Errorf("unable to close base64 encoder: %w", err)
   108  	}
   109  
   110  	return buf.String(), nil
   111  }
   112  
   113  func catalogingProgress(locations int64) *monitor.CatalogerTaskProgress {
   114  	info := monitor.GenericTask{
   115  		Title: monitor.Title{
   116  			Default: "File contents",
   117  		},
   118  		ParentID: monitor.TopLevelCatalogingTaskID,
   119  	}
   120  
   121  	return bus.StartCatalogerTask(info, locations, "")
   122  }