github.com/anchore/syft@v1.38.2/syft/internal/fileresolver/file_indexer.go (about)

     1  package fileresolver
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"path/filepath"
     7  
     8  	"github.com/wagoodman/go-progress"
     9  
    10  	"github.com/anchore/stereoscope/pkg/file"
    11  	"github.com/anchore/stereoscope/pkg/filetree"
    12  	"github.com/anchore/syft/internal/bus"
    13  	"github.com/anchore/syft/internal/log"
    14  	"github.com/anchore/syft/syft/internal/windows"
    15  )
    16  
    17  type fileIndexer struct {
    18  	path              string
    19  	base              string
    20  	pathIndexVisitors []PathIndexVisitor
    21  	errPaths          map[string]error
    22  	tree              filetree.ReadWriter
    23  	index             filetree.Index
    24  }
    25  
    26  func newFileIndexer(path, base string, visitors ...PathIndexVisitor) *fileIndexer {
    27  	i := &fileIndexer{
    28  		path:  path,
    29  		base:  base,
    30  		tree:  filetree.New(),
    31  		index: filetree.NewIndex(),
    32  		pathIndexVisitors: append(
    33  			[]PathIndexVisitor{
    34  				requireFileInfo,
    35  				disallowByFileType,
    36  				skipPathsByMountTypeAndName(path),
    37  			},
    38  			visitors...,
    39  		),
    40  		errPaths: make(map[string]error),
    41  	}
    42  
    43  	return i
    44  }
    45  
    46  // Build the indexer
    47  func (r *fileIndexer) build() (filetree.Reader, filetree.IndexReader, error) {
    48  	return r.tree, r.index, index(r.path, r.indexPath)
    49  }
    50  
    51  // Index file at the given path
    52  // A file indexer simply indexes the file and its directory.
    53  func index(path string, indexer func(string, *progress.AtomicStage) error) error {
    54  	// We want to index the file at the provided path and its parent directory.
    55  	// We need to probably check that we have file access
    56  	// We also need to determine what to do when the file itself is a symlink.
    57  	prog := bus.StartIndexingFiles(path)
    58  	defer prog.SetCompleted()
    59  
    60  	err := indexer(path, prog.AtomicStage)
    61  	if err != nil {
    62  		return fmt.Errorf("unable to index filesystem path=%q: %w", path, err)
    63  	}
    64  
    65  	return nil
    66  }
    67  
    68  // indexPath will index the file at the provided path as well as its parent directory.
    69  // It expects path to be a file, not a directory.
    70  // If a directory is provided then an error will be returned. Additionally, any IO or
    71  // permissions errors on the file at path or its parent directory will return an error.
    72  // Filter functions provided to the indexer are honoured, so if the path provided (or its parent
    73  // directory) is filtered by a filter function, an error is returned.
    74  func (r *fileIndexer) indexPath(path string, stager *progress.AtomicStage) error {
    75  	log.WithFields("path", path).Trace("indexing file path")
    76  
    77  	absPath, err := filepath.Abs(path)
    78  	if err != nil {
    79  		return err
    80  	}
    81  
    82  	// Protect against callers trying to call file_indexer with directories
    83  	fi, err := os.Stat(absPath)
    84  	// The directory indexer ignores stat errors, however this file indexer won't ignore them
    85  	if err != nil {
    86  		return fmt.Errorf("unable to stat path=%q: %w", path, err)
    87  	}
    88  	if fi.IsDir() {
    89  		return fmt.Errorf("unable to index file, given path was a directory=%q", path)
    90  	}
    91  
    92  	absSymlinkFreeFilePath, err := absoluteSymlinkFreePathToFile(path)
    93  	if err != nil {
    94  		return err
    95  	}
    96  
    97  	// Now index the file and its parent directory
    98  	// We try to index the parent directory first, because if the parent directory
    99  	// is ignored by any filter function, then we must ensure we also ignore the file.
   100  	absSymlinkFreeParent, err := absoluteSymlinkFreePathToParent(absSymlinkFreeFilePath)
   101  	if err != nil {
   102  		return err
   103  	}
   104  	parentFi, err := os.Stat(absSymlinkFreeParent)
   105  	if err != nil {
   106  		return fmt.Errorf("unable to stat parent of file=%q: %w", absSymlinkFreeParent, err)
   107  	}
   108  
   109  	stager.Set(absSymlinkFreeParent)
   110  	indexParentErr := r.filterAndIndex(absSymlinkFreeParent, parentFi)
   111  	if indexParentErr != nil {
   112  		return indexParentErr
   113  	}
   114  
   115  	// We have indexed the parent successfully, now attempt to index the file.
   116  	stager.Set(absSymlinkFreeFilePath)
   117  	indexFileErr := r.filterAndIndex(absSymlinkFreeFilePath, fi)
   118  	if indexFileErr != nil {
   119  		return indexFileErr
   120  	}
   121  
   122  	return nil
   123  }
   124  
   125  func (r *fileIndexer) filterAndIndex(path string, info os.FileInfo) error {
   126  	// check if any of the filters want us to ignore this path
   127  	for _, filterFn := range r.pathIndexVisitors {
   128  		if filterFn == nil {
   129  			continue
   130  		}
   131  
   132  		if filterErr := filterFn(r.base, path, info, nil); filterErr != nil {
   133  			// A filter function wants us to ignore this path, honour it
   134  			return filterErr
   135  		}
   136  	}
   137  
   138  	// here we check to see if we need to normalize paths to posix on the way in coming from windows
   139  	if windows.HostRunningOnWindows() {
   140  		path = windows.ToPosix(path)
   141  	}
   142  
   143  	err := r.addPathToIndex(path, info)
   144  	// If we hit file access errors, isFileAccessErr will handle logging & adding
   145  	// the path to the errPaths map.
   146  	// While the directory_indexer does not let these cause the indexer to throw
   147  	// we will here, as not having access to the file we index for a file source
   148  	// probably makes the file source creation useless? I need to check with Syft maintainers.
   149  	// This also poses the question, is errPaths worthwhile for file_indexer?
   150  	if r.isFileAccessErr(path, err) {
   151  		return err
   152  	}
   153  
   154  	return nil
   155  }
   156  
   157  // Add path to index. File indexer doesn't need to support symlink, as we should have abs symlink free path.
   158  // If we somehow get a symlink here, report as an error.
   159  func (r *fileIndexer) addPathToIndex(path string, info os.FileInfo) error {
   160  	switch t := file.TypeFromMode(info.Mode()); t {
   161  	case file.TypeDirectory:
   162  		return r.addDirectoryToIndex(path, info)
   163  	case file.TypeRegular:
   164  		return r.addFileToIndex(path, info)
   165  	default:
   166  		return fmt.Errorf("unsupported file type: %s", t)
   167  	}
   168  }
   169  
   170  func (r *fileIndexer) addDirectoryToIndex(path string, info os.FileInfo) error {
   171  	ref, err := r.tree.AddDir(file.Path(path))
   172  	if err != nil {
   173  		return err
   174  	}
   175  
   176  	metadata := NewMetadataFromPath(path, info)
   177  	r.index.Add(*ref, metadata)
   178  
   179  	return nil
   180  }
   181  
   182  func (r *fileIndexer) addFileToIndex(path string, info os.FileInfo) error {
   183  	ref, err := r.tree.AddFile(file.Path(path))
   184  	if err != nil {
   185  		return err
   186  	}
   187  
   188  	metadata := NewMetadataFromPath(path, info)
   189  	r.index.Add(*ref, metadata)
   190  
   191  	return nil
   192  }
   193  
   194  // Get absolute symlink free path to parent of the file
   195  func absoluteSymlinkFreePathToParent(path string) (string, error) {
   196  	absFilePath, err := absoluteSymlinkFreePathToFile(path)
   197  	if err != nil {
   198  		return "", err
   199  	}
   200  
   201  	return filepath.Dir(absFilePath), nil
   202  }
   203  
   204  // Get absolute symlink free path to the file
   205  func absoluteSymlinkFreePathToFile(path string) (string, error) {
   206  	absAnalysisPath, err := filepath.Abs(path)
   207  	if err != nil {
   208  		return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err)
   209  	}
   210  	dereferencedAbsAnalysisPath, err := filepath.EvalSymlinks(absAnalysisPath)
   211  	if err != nil {
   212  		return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err)
   213  	}
   214  	return dereferencedAbsAnalysisPath, nil
   215  }
   216  
   217  func (r *fileIndexer) isFileAccessErr(path string, err error) bool {
   218  	// don't allow for errors to stop indexing, keep track of the paths and continue.
   219  	if err != nil {
   220  		log.Warnf("unable to access path=%q: %+v", path, err)
   221  		r.errPaths[path] = err
   222  		return true
   223  	}
   224  	return false
   225  }