github.com/anchore/syft@v1.38.2/syft/internal/fileresolver/directory_indexer.go (about)

     1  package fileresolver
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io/fs"
     7  	"os"
     8  	"path"
     9  	"path/filepath"
    10  	"strings"
    11  
    12  	"github.com/wagoodman/go-progress"
    13  
    14  	"github.com/anchore/stereoscope/pkg/file"
    15  	"github.com/anchore/stereoscope/pkg/filetree"
    16  	"github.com/anchore/syft/internal/bus"
    17  	"github.com/anchore/syft/internal/log"
    18  	"github.com/anchore/syft/syft/internal/windows"
    19  )
    20  
    21  type PathIndexVisitor func(string, string, os.FileInfo, error) error
    22  
    23  type directoryIndexer struct {
    24  	path              string
    25  	base              string
    26  	pathIndexVisitors []PathIndexVisitor
    27  	errPaths          map[string]error
    28  	tree              filetree.ReadWriter
    29  	index             filetree.Index
    30  }
    31  
    32  func newDirectoryIndexer(path, base string, visitors ...PathIndexVisitor) *directoryIndexer {
    33  	i := &directoryIndexer{
    34  		path:  path,
    35  		base:  base,
    36  		tree:  filetree.New(),
    37  		index: filetree.NewIndex(),
    38  		pathIndexVisitors: append(
    39  			[]PathIndexVisitor{
    40  				requireFileInfo,
    41  				disallowByFileType,
    42  				skipPathsByMountTypeAndName(path),
    43  			},
    44  			visitors...,
    45  		),
    46  		errPaths: make(map[string]error),
    47  	}
    48  
    49  	// these additional stateful visitors should be the first thing considered when walking / indexing
    50  	i.pathIndexVisitors = append(
    51  		[]PathIndexVisitor{
    52  			i.disallowRevisitingVisitor,
    53  			i.disallowFileAccessErr,
    54  		},
    55  		i.pathIndexVisitors...,
    56  	)
    57  
    58  	return i
    59  }
    60  
    61  func (r *directoryIndexer) build() (filetree.Reader, filetree.IndexReader, error) {
    62  	return r.tree, r.index, indexAllRoots(r.path, r.indexTree)
    63  }
    64  
    65  func indexAllRoots(root string, indexer func(string, *progress.AtomicStage) ([]string, error)) error {
    66  	// why account for multiple roots? To cover cases when there is a symlink that references above the root path,
    67  	// in which case we need to additionally index where the link resolves to. it's for this reason why the filetree
    68  	// must be relative to the root of the filesystem (and not just relative to the given path).
    69  	pathsToIndex := []string{root}
    70  	fullPathsMap := map[string]struct{}{}
    71  
    72  	prog := bus.StartIndexingFiles(root)
    73  	defer prog.SetCompleted()
    74  loop:
    75  	for {
    76  		var currentPath string
    77  		switch len(pathsToIndex) {
    78  		case 0:
    79  			break loop
    80  		case 1:
    81  			currentPath, pathsToIndex = pathsToIndex[0], nil
    82  		default:
    83  			currentPath, pathsToIndex = pathsToIndex[0], pathsToIndex[1:]
    84  		}
    85  
    86  		additionalRoots, err := indexer(currentPath, prog.AtomicStage)
    87  		if err != nil {
    88  			return fmt.Errorf("unable to index filesystem path=%q: %w", currentPath, err)
    89  		}
    90  
    91  		for _, newRoot := range additionalRoots {
    92  			if _, ok := fullPathsMap[newRoot]; !ok {
    93  				fullPathsMap[newRoot] = struct{}{}
    94  				pathsToIndex = append(pathsToIndex, newRoot)
    95  			}
    96  		}
    97  	}
    98  
    99  	return nil
   100  }
   101  
   102  func (r *directoryIndexer) indexTree(root string, stager *progress.AtomicStage) ([]string, error) {
   103  	log.WithFields("path", root).Trace("indexing filetree")
   104  
   105  	var roots []string
   106  	var err error
   107  
   108  	root, err = filepath.Abs(root)
   109  	if err != nil {
   110  		return nil, err
   111  	}
   112  
   113  	// we want to be able to index single files with the directory resolver. However, we should also allow for attempting
   114  	// to index paths that do not exist (that is, a root that does not exist is not an error case that should stop indexing).
   115  	// For this reason we look for an opportunity to discover if the given root is a file, and if so add a single root,
   116  	// but continue forth with index regardless if the given root path exists or not.
   117  	fi, err := os.Stat(root)
   118  	if err != nil && fi != nil && !fi.IsDir() {
   119  		// note: we want to index the path regardless of an error stat-ing the path
   120  		newRoot, _ := r.indexPath(root, fi, nil)
   121  		if newRoot != "" {
   122  			roots = append(roots, newRoot)
   123  		}
   124  		return roots, nil
   125  	}
   126  
   127  	shouldIndexFullTree, err := isRealPath(root)
   128  	if err != nil {
   129  		return nil, err
   130  	}
   131  
   132  	if !shouldIndexFullTree {
   133  		newRoots, err := r.indexBranch(root, stager)
   134  		if err != nil {
   135  			return nil, fmt.Errorf("unable to index branch=%q: %w", root, err)
   136  		}
   137  
   138  		roots = append(roots, newRoots...)
   139  
   140  		return roots, nil
   141  	}
   142  
   143  	err = filepath.Walk(root,
   144  		func(path string, info os.FileInfo, err error) error {
   145  			stager.Set(path)
   146  
   147  			newRoot, err := r.indexPath(path, info, err)
   148  
   149  			if err != nil {
   150  				return err
   151  			}
   152  
   153  			if newRoot != "" {
   154  				roots = append(roots, newRoot)
   155  			}
   156  
   157  			return nil
   158  		})
   159  
   160  	if err != nil {
   161  		return nil, fmt.Errorf("unable to index root=%q: %w", root, err)
   162  	}
   163  
   164  	return roots, nil
   165  }
   166  
   167  func isRealPath(root string) (bool, error) {
   168  	rootParent := filepath.Clean(filepath.Dir(root))
   169  
   170  	realRootParent, err := filepath.EvalSymlinks(rootParent)
   171  	if err != nil {
   172  		return false, err
   173  	}
   174  
   175  	realRootParent = filepath.Clean(realRootParent)
   176  
   177  	return rootParent == realRootParent, nil
   178  }
   179  
   180  func (r *directoryIndexer) indexBranch(root string, stager *progress.AtomicStage) ([]string, error) {
   181  	rootRealPath, err := filepath.EvalSymlinks(root)
   182  	if err != nil {
   183  		var pathErr *os.PathError
   184  		if errors.As(err, &pathErr) {
   185  			// we can't index the path, but we shouldn't consider this to be fatal
   186  			// TODO: known-unknowns
   187  			log.WithFields("root", root, "error", err).Trace("unable to evaluate symlink while indexing branch")
   188  			return nil, nil
   189  		}
   190  		return nil, err
   191  	}
   192  
   193  	// there is a symlink within the path to the root, we need to index the real root parent first
   194  	// then capture the symlinks to the root path
   195  	roots, err := r.indexTree(rootRealPath, stager)
   196  	if err != nil {
   197  		return nil, fmt.Errorf("unable to index real root=%q: %w", rootRealPath, err)
   198  	}
   199  
   200  	// walk down all ancestor paths and shallow-add non-existing elements to the tree
   201  	for idx, p := range allContainedPaths(root) {
   202  		var targetPath string
   203  		if idx != 0 {
   204  			parent := path.Dir(p)
   205  			cleanParent, err := filepath.EvalSymlinks(parent)
   206  			if err != nil {
   207  				return nil, fmt.Errorf("unable to evaluate symlink for contained path parent=%q: %w", parent, err)
   208  			}
   209  			targetPath = filepath.Join(cleanParent, filepath.Base(p))
   210  		} else {
   211  			targetPath = p
   212  		}
   213  
   214  		stager.Set(targetPath)
   215  
   216  		lstat, err := os.Lstat(targetPath)
   217  		newRoot, err := r.indexPath(targetPath, lstat, err)
   218  		if err != nil && !errors.Is(err, ErrSkipPath) && !errors.Is(err, fs.SkipDir) {
   219  			return nil, fmt.Errorf("unable to index ancestor path=%q: %w", targetPath, err)
   220  		}
   221  		if newRoot != "" {
   222  			roots = append(roots, newRoot)
   223  		}
   224  	}
   225  
   226  	return roots, nil
   227  }
   228  
   229  func allContainedPaths(p string) []string {
   230  	var all []string
   231  	var currentPath string
   232  
   233  	cleanPath := strings.TrimSpace(p)
   234  
   235  	if cleanPath == "" {
   236  		return nil
   237  	}
   238  
   239  	// iterate through all parts of the path, replacing path elements with link resolutions where possible.
   240  	for idx, part := range strings.Split(filepath.Clean(cleanPath), file.DirSeparator) {
   241  		if idx == 0 && part == "" {
   242  			currentPath = file.DirSeparator
   243  			continue
   244  		}
   245  
   246  		// cumulatively gather where we are currently at and provide a rich object
   247  		currentPath = path.Join(currentPath, part)
   248  		all = append(all, currentPath)
   249  	}
   250  	return all
   251  }
   252  
   253  func (r *directoryIndexer) indexPath(givenPath string, info os.FileInfo, err error) (string, error) {
   254  	// ignore any path which a filter function returns true
   255  	for _, filterFn := range r.pathIndexVisitors {
   256  		if filterFn == nil {
   257  			continue
   258  		}
   259  
   260  		if filterErr := filterFn(r.base, givenPath, info, err); filterErr != nil {
   261  			if errors.Is(filterErr, fs.SkipDir) {
   262  				// signal to walk() to skip this directory entirely (even if we're processing a file)
   263  				return "", filterErr
   264  			}
   265  			// skip this path but don't affect walk() trajectory
   266  			return "", nil
   267  		}
   268  	}
   269  
   270  	if info == nil {
   271  		// walk may not be able to provide a FileInfo object, don't allow for this to stop indexing; keep track of the paths and continue.
   272  		r.errPaths[givenPath] = fmt.Errorf("no file info observable at path=%q", givenPath)
   273  		return "", nil
   274  	}
   275  
   276  	// here we check to see if we need to normalize paths to posix on the way in coming from windows
   277  	if windows.HostRunningOnWindows() {
   278  		givenPath = windows.ToPosix(givenPath)
   279  	}
   280  
   281  	newRoot, err := r.addPathToIndex(givenPath, info)
   282  	if r.isFileAccessErr(givenPath, err) {
   283  		return "", nil
   284  	}
   285  
   286  	return newRoot, nil
   287  }
   288  
   289  func (r *directoryIndexer) disallowFileAccessErr(_, path string, _ os.FileInfo, err error) error {
   290  	if r.isFileAccessErr(path, err) {
   291  		return ErrSkipPath
   292  	}
   293  	return nil
   294  }
   295  
   296  func (r *directoryIndexer) isFileAccessErr(path string, err error) bool {
   297  	// don't allow for errors to stop indexing, keep track of the paths and continue.
   298  	if err != nil {
   299  		log.Warnf("unable to access path=%q: %+v", path, err)
   300  		r.errPaths[path] = err
   301  		return true
   302  	}
   303  	return false
   304  }
   305  
   306  func (r directoryIndexer) addPathToIndex(p string, info os.FileInfo) (string, error) {
   307  	switch t := file.TypeFromMode(info.Mode()); t {
   308  	case file.TypeSymLink:
   309  		return r.addSymlinkToIndex(p, info)
   310  	case file.TypeDirectory:
   311  		return "", r.addDirectoryToIndex(p, info)
   312  	case file.TypeRegular:
   313  		return "", r.addFileToIndex(p, info)
   314  	default:
   315  		return "", fmt.Errorf("unsupported file type: %s", t)
   316  	}
   317  }
   318  
   319  func (r directoryIndexer) addDirectoryToIndex(p string, info os.FileInfo) error {
   320  	ref, err := r.tree.AddDir(file.Path(p))
   321  	if err != nil {
   322  		return err
   323  	}
   324  
   325  	metadata := NewMetadataFromPath(p, info)
   326  	r.index.Add(*ref, metadata)
   327  
   328  	return nil
   329  }
   330  
   331  func (r directoryIndexer) addFileToIndex(p string, info os.FileInfo) error {
   332  	ref, err := r.tree.AddFile(file.Path(p))
   333  	if err != nil {
   334  		return err
   335  	}
   336  
   337  	metadata := NewMetadataFromPath(p, info)
   338  	r.index.Add(*ref, metadata)
   339  
   340  	return nil
   341  }
   342  
   343  func (r directoryIndexer) addSymlinkToIndex(p string, info os.FileInfo) (string, error) {
   344  	linkTarget, err := os.Readlink(p)
   345  	if err != nil {
   346  		isOnWindows := windows.HostRunningOnWindows()
   347  		if isOnWindows {
   348  			p = windows.FromPosix(p)
   349  		}
   350  
   351  		linkTarget, err = filepath.EvalSymlinks(p)
   352  
   353  		if isOnWindows {
   354  			p = windows.ToPosix(p)
   355  		}
   356  
   357  		if err != nil {
   358  			return "", fmt.Errorf("unable to readlink for path=%q: %w", p, err)
   359  		}
   360  	}
   361  
   362  	if filepath.IsAbs(linkTarget) {
   363  		linkTarget = filepath.Clean(linkTarget)
   364  		// if the link is absolute (e.g, /bin/ls -> /bin/busybox) we need to
   365  		// resolve relative to the root of the base directory, if it is not already
   366  		// prefixed with a volume name
   367  		if filepath.VolumeName(linkTarget) == "" {
   368  			linkTarget = filepath.Join(r.base, filepath.Clean(linkTarget))
   369  		}
   370  	} else {
   371  		// if the link is not absolute (e.g, /dev/stderr -> fd/2 ) we need to
   372  		// resolve it relative to the directory in question (e.g. resolve to
   373  		// /dev/fd/2)
   374  		if r.base == "" {
   375  			linkTarget = filepath.Join(filepath.Dir(p), linkTarget)
   376  		} else {
   377  			// if the base is set, then we first need to resolve the link,
   378  			// before finding it's location in the base
   379  			dir, err := filepath.Rel(r.base, filepath.Dir(p))
   380  			// if the relative path to the base contains "..",i.e. p is the parent or ancestor of the base
   381  			// For example:
   382  			// dir: "/root/asymlink" -> "/root/realdir" (linkTarget:"realdir")
   383  			// base: "/root/asymlink"
   384  			// so the relative path of /root to the "/root/asymlink" is ".."
   385  			// we cannot directly concatenate ".." to "/root/symlink",however,
   386  			// the parent directory of linkTarget should be "/root"
   387  			for strings.HasPrefix(dir, "..") {
   388  				if strings.HasPrefix(dir, "../") {
   389  					dir = strings.TrimPrefix(dir, "../")
   390  				} else {
   391  					dir = strings.TrimPrefix(dir, "..")
   392  				}
   393  				lastSlash := strings.LastIndex(r.base, "/")
   394  				if lastSlash != -1 {
   395  					r.base = r.base[:lastSlash]
   396  				}
   397  				// In case of the root directory
   398  				if r.base == "" {
   399  					r.base = "/"
   400  				}
   401  			}
   402  			if err != nil {
   403  				return "", fmt.Errorf("unable to resolve relative path for path=%q: %w", p, err)
   404  			}
   405  			linkTarget = filepath.Join(r.base, filepath.Clean(filepath.Join("/", dir, linkTarget)))
   406  		}
   407  	}
   408  
   409  	ref, err := r.tree.AddSymLink(file.Path(p), file.Path(linkTarget))
   410  	if err != nil {
   411  		return "", err
   412  	}
   413  
   414  	targetAbsPath := linkTarget
   415  	if !filepath.IsAbs(targetAbsPath) {
   416  		targetAbsPath = filepath.Clean(filepath.Join(path.Dir(p), linkTarget))
   417  	}
   418  
   419  	metadata := NewMetadataFromPath(p, info)
   420  	metadata.LinkDestination = linkTarget
   421  	r.index.Add(*ref, metadata)
   422  
   423  	// if the target path does not exist, then do not report it as a new root, or try to send
   424  	// syft parsing there.
   425  	if _, err := os.Stat(targetAbsPath); err != nil && errors.Is(err, os.ErrNotExist) {
   426  		log.Debugf("link %s points to unresolved path %s, ignoring target as new root", p, targetAbsPath)
   427  		targetAbsPath = ""
   428  	}
   429  
   430  	return targetAbsPath, nil
   431  }
   432  
   433  func (r directoryIndexer) hasBeenIndexed(p string) (bool, *file.Metadata) {
   434  	filePath := file.Path(p)
   435  	if !r.tree.HasPath(filePath) {
   436  		return false, nil
   437  	}
   438  
   439  	exists, ref, err := r.tree.File(filePath)
   440  	if err != nil || !exists || !ref.HasReference() {
   441  		return false, nil
   442  	}
   443  
   444  	// cases like "/" will be in the tree, but not been indexed yet (a special case). We want to capture
   445  	// these cases as new paths to index.
   446  	if !ref.HasReference() {
   447  		return false, nil
   448  	}
   449  
   450  	entry, err := r.index.Get(*ref.Reference)
   451  	if err != nil {
   452  		return false, nil
   453  	}
   454  
   455  	return true, &entry.Metadata
   456  }
   457  
   458  func (r *directoryIndexer) disallowRevisitingVisitor(_, path string, _ os.FileInfo, _ error) error {
   459  	// this prevents visiting:
   460  	// - link destinations twice, once for the real file and another through the virtual path
   461  	// - infinite link cycles
   462  	if indexed, metadata := r.hasBeenIndexed(path); indexed {
   463  		if metadata.IsDir() {
   464  			// signal to walk() that we should skip this directory entirely
   465  			return fs.SkipDir
   466  		}
   467  		return ErrSkipPath
   468  	}
   469  	return nil
   470  }
   471  
   472  func disallowByFileType(_, _ string, info os.FileInfo, _ error) error {
   473  	if info == nil {
   474  		// we can't filter out by filetype for non-existent files
   475  		return nil
   476  	}
   477  	switch file.TypeFromMode(info.Mode()) {
   478  	case file.TypeCharacterDevice, file.TypeSocket, file.TypeBlockDevice, file.TypeFIFO, file.TypeIrregular:
   479  		return ErrSkipPath
   480  		// note: symlinks that point to these files may still get by.
   481  		// We handle this later in processing to help prevent against infinite links traversal.
   482  	}
   483  
   484  	return nil
   485  }
   486  
   487  func requireFileInfo(_, _ string, info os.FileInfo, _ error) error {
   488  	if info == nil {
   489  		return ErrSkipPath
   490  	}
   491  	return nil
   492  }