github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/source/filesource/file_source.go (about)

     1  package filesource
     2  
     3  import (
     4  	"crypto"
     5  	"fmt"
     6  	"io/fs"
     7  	"os"
     8  	"path"
     9  	"path/filepath"
    10  	"sync"
    11  
    12  	"github.com/mholt/archiver/v3"
    13  	"github.com/opencontainers/go-digest"
    14  
    15  	stereoFile "github.com/anchore/stereoscope/pkg/file"
    16  	intFile "github.com/anchore/syft/internal/file"
    17  	"github.com/anchore/syft/internal/log"
    18  	"github.com/anchore/syft/syft/artifact"
    19  	"github.com/anchore/syft/syft/file"
    20  	"github.com/anchore/syft/syft/internal/fileresolver"
    21  	"github.com/anchore/syft/syft/source"
    22  	"github.com/anchore/syft/syft/source/directorysource"
    23  	"github.com/anchore/syft/syft/source/internal"
    24  )
    25  
    26  var _ source.Source = (*fileSource)(nil)
    27  
    28  type Config struct {
    29  	Path             string
    30  	Exclude          source.ExcludeConfig
    31  	DigestAlgorithms []crypto.Hash
    32  	Alias            source.Alias
    33  }
    34  
    35  type fileSource struct {
    36  	id               artifact.ID
    37  	digestForVersion string
    38  	config           Config
    39  	resolver         *fileresolver.Directory
    40  	mutex            *sync.Mutex
    41  	closer           func() error
    42  	digests          []file.Digest
    43  	mimeType         string
    44  	analysisPath     string
    45  }
    46  
    47  func NewFromPath(path string) (source.Source, error) {
    48  	return New(Config{Path: path})
    49  }
    50  
    51  func New(cfg Config) (source.Source, error) {
    52  	fileMeta, err := os.Stat(cfg.Path)
    53  	if err != nil {
    54  		return nil, fmt.Errorf("unable to stat path=%q: %w", cfg.Path, err)
    55  	}
    56  
    57  	if fileMeta.IsDir() {
    58  		return nil, fmt.Errorf("given path is a directory: %q", cfg.Path)
    59  	}
    60  
    61  	analysisPath, cleanupFn := fileAnalysisPath(cfg.Path)
    62  
    63  	var digests []file.Digest
    64  	if len(cfg.DigestAlgorithms) > 0 {
    65  		fh, err := os.Open(cfg.Path)
    66  		if err != nil {
    67  			return nil, fmt.Errorf("unable to open file=%q: %w", cfg.Path, err)
    68  		}
    69  
    70  		defer fh.Close()
    71  
    72  		digests, err = intFile.NewDigestsFromFile(fh, cfg.DigestAlgorithms)
    73  		if err != nil {
    74  			return nil, fmt.Errorf("unable to calculate digests for file=%q: %w", cfg.Path, err)
    75  		}
    76  	}
    77  
    78  	fh, err := os.Open(cfg.Path)
    79  	if err != nil {
    80  		return nil, fmt.Errorf("unable to open file=%q: %w", cfg.Path, err)
    81  	}
    82  
    83  	defer fh.Close()
    84  
    85  	id, versionDigest := deriveIDFromFile(cfg)
    86  
    87  	return &fileSource{
    88  		id:               id,
    89  		config:           cfg,
    90  		mutex:            &sync.Mutex{},
    91  		closer:           cleanupFn,
    92  		analysisPath:     analysisPath,
    93  		digestForVersion: versionDigest,
    94  		digests:          digests,
    95  		mimeType:         stereoFile.MIMEType(fh),
    96  	}, nil
    97  }
    98  
    99  // deriveIDFromFile derives an artifact ID from the contents of a file. If an alias is provided, it will be included
   100  // in the ID derivation (along with contents). This way if the user scans the same item but is considered to be
   101  // logically different, then ID will express that.
   102  func deriveIDFromFile(cfg Config) (artifact.ID, string) {
   103  	d := digestOfFileContents(cfg.Path)
   104  	info := d
   105  
   106  	if !cfg.Alias.IsEmpty() {
   107  		// if the user provided an alias, we want to consider that in the artifact ID. This way if the user
   108  		// scans the same item but is considered to be logically different, then ID will express that.
   109  		info += fmt.Sprintf(":%s@%s", cfg.Alias.Name, cfg.Alias.Version)
   110  	}
   111  
   112  	return internal.ArtifactIDFromDigest(digest.SHA256.FromString(info).String()), d
   113  }
   114  
   115  func (s fileSource) ID() artifact.ID {
   116  	return s.id
   117  }
   118  
   119  func (s fileSource) Describe() source.Description {
   120  	name := path.Base(s.config.Path)
   121  	version := s.digestForVersion
   122  	if !s.config.Alias.IsEmpty() {
   123  		a := s.config.Alias
   124  		if a.Name != "" {
   125  			name = a.Name
   126  		}
   127  
   128  		if a.Version != "" {
   129  			version = a.Version
   130  		}
   131  	}
   132  	return source.Description{
   133  		ID:      string(s.id),
   134  		Name:    name,
   135  		Version: version,
   136  		Metadata: source.FileMetadata{
   137  			Path:     s.config.Path,
   138  			Digests:  s.digests,
   139  			MIMEType: s.mimeType,
   140  		},
   141  	}
   142  }
   143  
   144  func (s fileSource) FileResolver(_ source.Scope) (file.Resolver, error) {
   145  	s.mutex.Lock()
   146  	defer s.mutex.Unlock()
   147  
   148  	if s.resolver != nil {
   149  		return s.resolver, nil
   150  	}
   151  
   152  	exclusionFunctions, err := directorysource.GetDirectoryExclusionFunctions(s.analysisPath, s.config.Exclude.Paths)
   153  	if err != nil {
   154  		return nil, err
   155  	}
   156  
   157  	fi, err := os.Stat(s.analysisPath)
   158  	if err != nil {
   159  		return nil, fmt.Errorf("unable to stat path=%q: %w", s.analysisPath, err)
   160  	}
   161  	isArchiveAnalysis := fi.IsDir()
   162  
   163  	absParentDir, err := absoluteSymlinkFreePathToParent(s.analysisPath)
   164  	if err != nil {
   165  		return nil, err
   166  	}
   167  
   168  	var res *fileresolver.Directory
   169  	if isArchiveAnalysis {
   170  		// this is an analysis of an archive file... we should scan the directory where the archive contents
   171  		res, err = fileresolver.NewFromDirectory(s.analysisPath, "", exclusionFunctions...)
   172  		if err != nil {
   173  			return nil, fmt.Errorf("unable to create directory resolver: %w", err)
   174  		}
   175  	} else {
   176  		// this is an analysis of a single file. We want to ultimately scan the directory that the file is in, but we
   177  		// don't want to include any other files except this the given file.
   178  		exclusionFunctions = append([]fileresolver.PathIndexVisitor{
   179  
   180  			// note: we should exclude these kinds of paths first before considering any other user-provided exclusions
   181  			func(_, p string, _ os.FileInfo, _ error) error {
   182  				if p == absParentDir {
   183  					// this is the root directory... always include it
   184  					return nil
   185  				}
   186  
   187  				if filepath.Dir(p) != absParentDir {
   188  					// we are no longer in the root directory containing the single file we want to scan...
   189  					// we should skip the directory this path resides in entirely!
   190  					return fs.SkipDir
   191  				}
   192  
   193  				if filepath.Base(p) != filepath.Base(s.config.Path) {
   194  					// we're in the root directory, but this is not the file we want to scan...
   195  					// we should selectively skip this file (not the directory we're in).
   196  					return fileresolver.ErrSkipPath
   197  				}
   198  				return nil
   199  			},
   200  		}, exclusionFunctions...)
   201  
   202  		res, err = fileresolver.NewFromDirectory(absParentDir, absParentDir, exclusionFunctions...)
   203  		if err != nil {
   204  			return nil, fmt.Errorf("unable to create directory resolver: %w", err)
   205  		}
   206  	}
   207  
   208  	s.resolver = res
   209  
   210  	return s.resolver, nil
   211  }
   212  
   213  func absoluteSymlinkFreePathToParent(path string) (string, error) {
   214  	absAnalysisPath, err := filepath.Abs(path)
   215  	if err != nil {
   216  		return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err)
   217  	}
   218  	dereferencedAbsAnalysisPath, err := filepath.EvalSymlinks(absAnalysisPath)
   219  	if err != nil {
   220  		return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err)
   221  	}
   222  	return filepath.Dir(dereferencedAbsAnalysisPath), nil
   223  }
   224  
   225  func (s *fileSource) Close() error {
   226  	if s.closer == nil {
   227  		return nil
   228  	}
   229  	s.resolver = nil
   230  	return s.closer()
   231  }
   232  
   233  // fileAnalysisPath returns the path given, or in the case the path is an archive, the location where the archive
   234  // contents have been made available. A cleanup function is provided for any temp files created (if any).
   235  func fileAnalysisPath(path string) (string, func() error) {
   236  	var analysisPath = path
   237  	var cleanupFn = func() error { return nil }
   238  
   239  	// if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and
   240  	// use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is
   241  	// unarchived.
   242  	envelopedUnarchiver, err := archiver.ByExtension(path)
   243  	if unarchiver, ok := envelopedUnarchiver.(archiver.Unarchiver); err == nil && ok {
   244  		if tar, ok := unarchiver.(*archiver.Tar); ok {
   245  			// when tar files are extracted, if there are multiple entries at the same
   246  			// location, the last entry wins
   247  			// NOTE: this currently does not display any messages if an overwrite happens
   248  			tar.OverwriteExisting = true
   249  		}
   250  		unarchivedPath, tmpCleanup, err := unarchiveToTmp(path, unarchiver)
   251  		if err != nil {
   252  			log.Warnf("file could not be unarchived: %+v", err)
   253  		} else {
   254  			log.Debugf("source path is an archive")
   255  			analysisPath = unarchivedPath
   256  		}
   257  		if tmpCleanup != nil {
   258  			cleanupFn = tmpCleanup
   259  		}
   260  	}
   261  
   262  	return analysisPath, cleanupFn
   263  }
   264  
   265  func digestOfFileContents(path string) string {
   266  	file, err := os.Open(path)
   267  	if err != nil {
   268  		return digest.SHA256.FromString(path).String()
   269  	}
   270  	defer file.Close()
   271  	di, err := digest.SHA256.FromReader(file)
   272  	if err != nil {
   273  		return digest.SHA256.FromString(path).String()
   274  	}
   275  	return di.String()
   276  }
   277  
   278  func unarchiveToTmp(path string, unarchiver archiver.Unarchiver) (string, func() error, error) {
   279  	tempDir, err := os.MkdirTemp("", "syft-archive-contents-")
   280  	if err != nil {
   281  		return "", func() error { return nil }, fmt.Errorf("unable to create tempdir for archive processing: %w", err)
   282  	}
   283  
   284  	cleanupFn := func() error {
   285  		return os.RemoveAll(tempDir)
   286  	}
   287  
   288  	return tempDir, cleanupFn, unarchiver.Unarchive(path, tempDir)
   289  }