github.com/anchore/syft@v1.38.2/syft/internal/fileresolver/path_skipper.go (about)

     1  package fileresolver
     2  
     3  import (
     4  	"io/fs"
     5  	"os"
     6  	"sort"
     7  	"strings"
     8  
     9  	"github.com/moby/sys/mountinfo"
    10  
    11  	"github.com/anchore/syft/internal/log"
    12  )
    13  
    14  type pathSkipper struct {
    15  	// scanTarget is the root path that is being scanned (without any base-path logic applied).
    16  	scanTarget string
    17  
    18  	// ignorableMountTypes is a set of mount types that should be ignored. Optionally a list of paths (the map values)
    19  	// can be provided that this mount type should be ignored at.  For example in some containers /dev is mounted
    20  	// as a tmpfs and should be ignored, but /tmp should not be ignored. An empty list of paths means that paths
    21  	// within the mount type should always be ignored.
    22  	ignorableMountTypes map[string][]string
    23  
    24  	// current mount paths for the current system
    25  	mounts       []*mountinfo.Info
    26  	mountsByType map[string][]*mountinfo.Info
    27  }
    28  
    29  // skipPathsByMountTypeAndName accepts the root path and returns a PathIndexVisitor that will skip paths based
    30  // the filesystem type, the mountpoint, and configured blocklist paths for each filesystem type.
    31  // This will help syft dodge filesystem topologies that have the potential to make the search space much bigger in
    32  // areas known to not traditionally contain files of interest (installed software).  It is meant to allow scanning
    33  // "/" on a unix host to succeed, while also not causing any files in a narrow directory scan to be skipped unnecessarily.
    34  func skipPathsByMountTypeAndName(root string) PathIndexVisitor {
    35  	infos, err := mountinfo.GetMounts(nil)
    36  	if err != nil {
    37  		log.WithFields("error", err).Debug("unable to get system mounts")
    38  		return func(_ string, _ string, _ os.FileInfo, _ error) error {
    39  			return nil
    40  		}
    41  	}
    42  
    43  	return newPathSkipperFromMounts(root, infos).pathIndexVisitor
    44  }
    45  
    46  func newPathSkipperFromMounts(root string, infos []*mountinfo.Info) pathSkipper {
    47  	// we're only interested in ignoring the logical filesystems typically found at these mount points:
    48  	// - /proc
    49  	//     - procfs
    50  	//     - proc
    51  	// - /sys
    52  	//     - sysfs
    53  	// - /dev
    54  	//     - devfs - BSD/darwin flavored systems and old linux systems
    55  	//     - devtmpfs - driver core maintained /dev tmpfs
    56  	//     - udev - userspace implementation that replaced devfs
    57  	//     - tmpfs - used for /dev in special instances (within a container)
    58  	ignorableMountTypes := map[string][]string{
    59  		"proc":     nil,
    60  		"procfs":   nil,
    61  		"sysfs":    nil,
    62  		"devfs":    nil,
    63  		"devtmpfs": nil,
    64  		"udev":     nil,
    65  		// note: there should be no order required (e.g. search /sys/thing before /sys) since that would imply that
    66  		// we could not ignore a nested path within a path that would be ignored anyway.
    67  		"tmpfs": {"/run", "/dev", "/var/run", "/var/lock", "/sys"},
    68  	}
    69  
    70  	// The longest path is the most specific path, e.g.
    71  	// if / is mounted as tmpfs, but /home/syft/permanent is mounted as ext4,
    72  	// then the mount type for /home/syft/permanent/foo is ext4, and the mount info
    73  	// stating that /home/syft/permanent is ext4 has the longer mount point.
    74  	sort.Slice(infos, func(i, j int) bool {
    75  		return len(infos[i].Mountpoint) > len(infos[j].Mountpoint)
    76  	})
    77  
    78  	mountsByType := make(map[string][]*mountinfo.Info)
    79  
    80  	for _, mi := range infos {
    81  		mountsByType[mi.FSType] = append(mountsByType[mi.FSType], mi)
    82  	}
    83  
    84  	return pathSkipper{
    85  		scanTarget:          root,
    86  		ignorableMountTypes: ignorableMountTypes,
    87  		mounts:              infos,
    88  		mountsByType:        mountsByType,
    89  	}
    90  }
    91  
    92  func (ps pathSkipper) pathIndexVisitor(_ string, givenPath string, _ os.FileInfo, _ error) error {
    93  	for _, mi := range ps.mounts {
    94  		conditionalPaths, ignorable := ps.ignorableMountTypes[mi.FSType]
    95  
    96  		// Rule 0: Make sure the given path is within the mount point; if not let the scan continue
    97  		if !containsPath(givenPath, mi.Mountpoint) {
    98  			continue
    99  		}
   100  
   101  		// Rule 1: ignore any path within a mount point that is of the given filesystem type unconditionally
   102  		if len(conditionalPaths) == 0 {
   103  			if !ignorable {
   104  				// we've matched on the most specific path at this point, which means we should stop searching
   105  				// mount points for this path
   106  				break
   107  			}
   108  
   109  			log.WithFields(
   110  				"path", givenPath,
   111  				"mountpoint", mi.Mountpoint,
   112  				"fs", mi.FSType,
   113  			).Debug("ignoring path based on mountpoint filesystem type")
   114  
   115  			return fs.SkipDir
   116  		}
   117  
   118  		// Rule 2: ignore any path within a mount point that is of the given filesystem type, only if
   119  		// the path is on a known blocklist of paths for that filesystem type.
   120  		// For example: /dev can be mounted as a tmpfs, which should always be skipped.
   121  		for _, conditionalPath := range conditionalPaths {
   122  			if !containsPath(givenPath, conditionalPath) {
   123  				continue
   124  			}
   125  
   126  			log.WithFields(
   127  				"path", givenPath,
   128  				"mountpoint", mi.Mountpoint,
   129  				"fs", mi.FSType,
   130  				"condition", conditionalPath,
   131  			).Debug("ignoring path based on mountpoint filesystem type")
   132  
   133  			return fs.SkipDir
   134  		}
   135  	}
   136  
   137  	return nil
   138  }
   139  
   140  func containsPath(p1, p2 string) bool {
   141  	p1Clean := simpleClean(p1)
   142  	p2Clean := simpleClean(p2)
   143  	if p1Clean == p2Clean {
   144  		return true
   145  	}
   146  	if !strings.HasPrefix(p1Clean, p2Clean) {
   147  		return false
   148  	}
   149  	// This is done to avoid allocation of a new string
   150  	return len(p1Clean) > len(p2Clean) && p1Clean[len(p2Clean)] == '/'
   151  }
   152  
   153  func simpleClean(p string) string {
   154  	p = strings.TrimSpace(p)
   155  	if p == "" {
   156  		return "."
   157  	}
   158  	if p == "/" {
   159  		return ""
   160  	}
   161  	return strings.TrimSuffix(p, "/")
   162  }