github.com/anchore/syft@v1.38.2/syft/source/filesource/file_source.go (about)

     1  package filesource
     2  
     3  import (
     4  	"context"
     5  	"crypto"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"path"
    10  	"path/filepath"
    11  	"sync"
    12  
    13  	"github.com/mholt/archives"
    14  	"github.com/opencontainers/go-digest"
    15  
    16  	stereoFile "github.com/anchore/stereoscope/pkg/file"
    17  	intFile "github.com/anchore/syft/internal/file"
    18  	"github.com/anchore/syft/internal/log"
    19  	"github.com/anchore/syft/syft/artifact"
    20  	"github.com/anchore/syft/syft/file"
    21  	"github.com/anchore/syft/syft/internal/fileresolver"
    22  	"github.com/anchore/syft/syft/source"
    23  	"github.com/anchore/syft/syft/source/directorysource"
    24  	"github.com/anchore/syft/syft/source/internal"
    25  )
    26  
    27  var _ source.Source = (*fileSource)(nil)
    28  
    29  type Config struct {
    30  	Path               string
    31  	Exclude            source.ExcludeConfig
    32  	DigestAlgorithms   []crypto.Hash
    33  	Alias              source.Alias
    34  	SkipExtractArchive bool
    35  }
    36  
    37  type fileSource struct {
    38  	id               artifact.ID
    39  	digestForVersion string
    40  	config           Config
    41  	resolver         file.Resolver
    42  	mutex            *sync.Mutex
    43  	closer           func() error
    44  	digests          []file.Digest
    45  	mimeType         string
    46  	analysisPath     string
    47  }
    48  
    49  func NewFromPath(path string) (source.Source, error) {
    50  	return New(Config{Path: path})
    51  }
    52  
    53  func New(cfg Config) (source.Source, error) {
    54  	f, err := os.Open(cfg.Path)
    55  	if err != nil {
    56  		return nil, fmt.Errorf("unable to open file=%q: %w", cfg.Path, err)
    57  	}
    58  	defer f.Close()
    59  
    60  	fileMeta, err := f.Stat()
    61  	if err != nil {
    62  		return nil, fmt.Errorf("unable to stat path=%q: %w", cfg.Path, err)
    63  	}
    64  
    65  	if fileMeta.IsDir() {
    66  		return nil, fmt.Errorf("given path is a directory: %q", cfg.Path)
    67  	}
    68  
    69  	var digests []file.Digest
    70  	if len(cfg.DigestAlgorithms) > 0 {
    71  		digests, err = intFile.NewDigestsFromFile(context.TODO(), f, cfg.DigestAlgorithms)
    72  		if err != nil {
    73  			return nil, fmt.Errorf("unable to calculate digests for file=%q: %w", cfg.Path, err)
    74  		}
    75  	}
    76  
    77  	analysisPath, cleanupFn, err := fileAnalysisPath(cfg.Path, cfg.SkipExtractArchive)
    78  	if err != nil {
    79  		return nil, fmt.Errorf("unable to extract file analysis path=%q: %w", cfg.Path, err)
    80  	}
    81  
    82  	id, versionDigest := deriveIDFromFile(cfg)
    83  
    84  	return &fileSource{
    85  		id:               id,
    86  		config:           cfg,
    87  		mutex:            &sync.Mutex{},
    88  		closer:           cleanupFn,
    89  		analysisPath:     analysisPath,
    90  		digestForVersion: versionDigest,
    91  		digests:          digests,
    92  		mimeType:         stereoFile.MIMEType(f),
    93  	}, nil
    94  }
    95  
    96  func (s fileSource) ID() artifact.ID {
    97  	return s.id
    98  }
    99  
   100  func (s fileSource) Describe() source.Description {
   101  	name := path.Base(s.config.Path)
   102  	version := s.digestForVersion
   103  	supplier := ""
   104  	if !s.config.Alias.IsEmpty() {
   105  		a := s.config.Alias
   106  		if a.Name != "" {
   107  			name = a.Name
   108  		}
   109  
   110  		if a.Version != "" {
   111  			version = a.Version
   112  		}
   113  
   114  		if a.Supplier != "" {
   115  			supplier = a.Supplier
   116  		}
   117  	}
   118  	return source.Description{
   119  		ID:       string(s.id),
   120  		Name:     name,
   121  		Version:  version,
   122  		Supplier: supplier,
   123  		Metadata: source.FileMetadata{
   124  			Path:     s.config.Path,
   125  			Digests:  s.digests,
   126  			MIMEType: s.mimeType,
   127  		},
   128  	}
   129  }
   130  
   131  func (s fileSource) FileResolver(_ source.Scope) (file.Resolver, error) {
   132  	s.mutex.Lock()
   133  	defer s.mutex.Unlock()
   134  
   135  	if s.resolver != nil {
   136  		return s.resolver, nil
   137  	}
   138  
   139  	exclusionFunctions, err := directorysource.GetDirectoryExclusionFunctions(s.analysisPath, s.config.Exclude.Paths)
   140  	if err != nil {
   141  		return nil, err
   142  	}
   143  
   144  	fi, err := os.Stat(s.analysisPath)
   145  	if err != nil {
   146  		return nil, fmt.Errorf("unable to stat path=%q: %w", s.analysisPath, err)
   147  	}
   148  
   149  	if isArchiveAnalysis := fi.IsDir(); isArchiveAnalysis {
   150  		// this is an analysis of an archive file... we should scan the directory where the archive contents
   151  		res, err := fileresolver.NewFromDirectory(s.analysisPath, "", exclusionFunctions...)
   152  		if err != nil {
   153  			return nil, fmt.Errorf("unable to create directory resolver: %w", err)
   154  		}
   155  
   156  		s.resolver = res
   157  		return s.resolver, nil
   158  	}
   159  
   160  	// This is analysis of a single file. Use file indexer.
   161  	res, err := fileresolver.NewFromFile(s.analysisPath, exclusionFunctions...)
   162  	if err != nil {
   163  		return nil, fmt.Errorf("unable to create file resolver: %w", err)
   164  	}
   165  
   166  	s.resolver = res
   167  	return s.resolver, nil
   168  }
   169  
   170  func (s *fileSource) Close() error {
   171  	s.mutex.Lock()
   172  	defer s.mutex.Unlock()
   173  
   174  	if s.closer == nil {
   175  		return nil
   176  	}
   177  
   178  	s.resolver = nil
   179  	return s.closer()
   180  }
   181  
   182  // deriveIDFromFile derives an artifact ID from the contents of a file. If an alias is provided, it will be included
   183  // in the ID derivation (along with contents). This way if the user scans the same item but is considered to be
   184  // logically different, then ID will express that.
   185  func deriveIDFromFile(cfg Config) (artifact.ID, string) {
   186  	d := digestOfFileContents(cfg.Path)
   187  	info := d
   188  
   189  	if !cfg.Alias.IsEmpty() {
   190  		// if the user provided an alias, we want to consider that in the artifact ID. This way if the user
   191  		// scans the same item but is considered to be logically different, then ID will express that.
   192  		info += fmt.Sprintf(":%s@%s", cfg.Alias.Name, cfg.Alias.Version)
   193  	}
   194  
   195  	return internal.ArtifactIDFromDigest(digest.SHA256.FromString(info).String()), d
   196  }
   197  
   198  // fileAnalysisPath returns the path given, or in the case the path is an archive, the location where the archive
   199  // contents have been made available. A cleanup function is provided for any temp files created (if any).
   200  // Users can disable unpacking archives, allowing individual cataloguers to extract them instead (where
   201  // supported)
   202  func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() error, error) {
   203  	var cleanupFn = func() error { return nil }
   204  	var analysisPath = path
   205  
   206  	if skipExtractArchive {
   207  		return analysisPath, cleanupFn, nil
   208  	}
   209  
   210  	envelopedUnarchiver, _, err := intFile.IdentifyArchive(context.Background(), path, nil)
   211  	if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok {
   212  		analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver)
   213  		if err != nil {
   214  			return "", nil, fmt.Errorf("unable to unarchive source file: %w", err)
   215  		}
   216  
   217  		log.Debugf("source path is an archive")
   218  	}
   219  
   220  	return analysisPath, cleanupFn, nil
   221  }
   222  
   223  func digestOfFileContents(path string) string {
   224  	f, err := os.Open(path)
   225  	if err != nil {
   226  		return digest.SHA256.FromString(path).String()
   227  	}
   228  	defer f.Close()
   229  
   230  	di, err := digest.SHA256.FromReader(f)
   231  	if err != nil {
   232  		return digest.SHA256.FromString(path).String()
   233  	}
   234  
   235  	return di.String()
   236  }
   237  
   238  func unarchiveToTmp(path string, unarchiver archives.Extractor) (string, func() error, error) {
   239  	var cleanupFn = func() error { return nil }
   240  	archive, err := os.Open(path)
   241  	if err != nil {
   242  		return "", cleanupFn, fmt.Errorf("unable to open archive: %v", err)
   243  	}
   244  	defer archive.Close()
   245  
   246  	tempDir, err := os.MkdirTemp("", "syft-archive-contents-")
   247  	if err != nil {
   248  		return "", cleanupFn, fmt.Errorf("unable to create tempdir for archive processing: %w", err)
   249  	}
   250  
   251  	visitor := func(_ context.Context, file archives.FileInfo) error {
   252  		// Protect against symlink attacks by ensuring path doesn't escape tempDir
   253  		destPath, err := intFile.SafeJoin(tempDir, file.NameInArchive)
   254  		if err != nil {
   255  			return err
   256  		}
   257  
   258  		if file.IsDir() {
   259  			return os.MkdirAll(destPath, file.Mode())
   260  		}
   261  
   262  		if err = os.MkdirAll(filepath.Dir(destPath), os.ModeDir|0755); err != nil {
   263  			return fmt.Errorf("failed to create parent directory: %w", err)
   264  		}
   265  
   266  		rc, err := file.Open()
   267  		if err != nil {
   268  			return fmt.Errorf("failed to open file in archive: %w", err)
   269  		}
   270  		defer rc.Close()
   271  
   272  		destFile, err := os.Create(destPath)
   273  		if err != nil {
   274  			return fmt.Errorf("failed to create file in destination: %w", err)
   275  		}
   276  		defer destFile.Close()
   277  
   278  		if err := destFile.Chmod(file.Mode()); err != nil {
   279  			return fmt.Errorf("failed to change mode of destination file: %w", err)
   280  		}
   281  
   282  		if _, err := io.Copy(destFile, rc); err != nil {
   283  			return fmt.Errorf("failed to copy file contents: %w", err)
   284  		}
   285  
   286  		return nil
   287  	}
   288  
   289  	return tempDir, func() error {
   290  		return os.RemoveAll(tempDir)
   291  	}, unarchiver.Extract(context.Background(), archive, visitor)
   292  }