github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/filesystem.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package filesystem provides the interface for inventory extraction plugins.
    16  package filesystem
    17  
    18  import (
    19  	"context"
    20  	"errors"
    21  	"fmt"
    22  	"io"
    23  	"io/fs"
    24  	"os"
    25  	"path/filepath"
    26  	"regexp"
    27  	"slices"
    28  	"strings"
    29  	"time"
    30  
    31  	"github.com/gobwas/glob"
    32  	"github.com/google/osv-scalibr/extractor"
    33  	"github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/common"
    34  	"github.com/google/osv-scalibr/extractor/filesystem/internal"
    35  	scalibrfs "github.com/google/osv-scalibr/fs"
    36  	"github.com/google/osv-scalibr/inventory"
    37  	"github.com/google/osv-scalibr/log"
    38  	"github.com/google/osv-scalibr/plugin"
    39  	"github.com/google/osv-scalibr/stats"
    40  )
    41  
    42  var (
    43  	// ErrNotRelativeToScanRoots is returned when one of the file or directory to be retrieved or
    44  	// skipped is not relative to any of the scan roots.
    45  	ErrNotRelativeToScanRoots = errors.New("path not relative to any of the scan roots")
    46  	// ErrFailedToOpenFile is returned when opening a file fails.
    47  	ErrFailedToOpenFile = errors.New("failed to open file")
    48  )
    49  
    50  // Extractor is the filesystem-based inventory extraction plugin, used to extract inventory data
    51  // from the filesystem such as OS and language packages.
    52  type Extractor interface {
    53  	extractor.Extractor
    54  	// FileRequired should return true if the file described by path and file info is
    55  	// relevant for the extractor.
    56  	// Note that the plugin doesn't traverse the filesystem itself but relies on the core
    57  	// library for that.
    58  	FileRequired(api FileAPI) bool
    59  	// Extract extracts inventory data relevant for the extractor from a given file.
    60  	Extract(ctx context.Context, input *ScanInput) (inventory.Inventory, error)
    61  }
    62  
    63  // FileAPI is the interface for accessing file information and path.
    64  type FileAPI interface {
    65  	// Stat returns the file info for the file.
    66  	Stat() (fs.FileInfo, error)
    67  	Path() string
    68  }
    69  
    70  // ScanInput describes one file to extract from.
    71  type ScanInput struct {
    72  	// FS for file access. This is rooted at Root.
    73  	FS scalibrfs.FS
    74  	// The path of the file to extract, relative to Root.
    75  	Path string
    76  	// The root directory where the extraction file walking started from.
    77  	Root string
    78  	Info fs.FileInfo
    79  	// A reader for accessing contents of the file.
    80  	// Note that the file is closed by the core library, not the plugin.
    81  	Reader io.Reader
    82  }
    83  
    84  // Config stores the config settings for an extraction run.
    85  type Config struct {
    86  	Extractors []Extractor
    87  	ScanRoots  []*scalibrfs.ScanRoot
    88  	// Optional: Individual files to extract inventory from. If specified, the
    89  	// extractors will only look at these files during the filesystem traversal.
    90  	// Note that these are not relative to the ScanRoots and thus need to be
    91  	// sub-directories of one of the ScanRoots.
    92  	PathsToExtract []string
    93  	// Optional: If true, only the files in the top-level directories in PathsToExtract are
    94  	// extracted and sub-directories are ignored.
    95  	IgnoreSubDirs bool
    96  	// Optional: Directories that the file system walk should ignore.
    97  	// Note that these are not relative to the ScanRoots and thus need to be
    98  	// sub-directories of one of the ScanRoots.
    99  	// TODO(b/279413691): Also skip local paths, e.g. "Skip all .git dirs"
   100  	DirsToSkip []string
   101  	// Optional: If the regex matches a directory, it will be skipped.
   102  	SkipDirRegex *regexp.Regexp
   103  	// Optional: If the regex matches a glob, it will be skipped.
   104  	SkipDirGlob glob.Glob
   105  	// Optional: Skip files declared in .gitignore files in source repos.
   106  	UseGitignore bool
   107  	// Optional: stats allows to enter a metric hook. If left nil, no metrics will be recorded.
   108  	Stats stats.Collector
   109  	// Optional: Whether to read symlinks.
   110  	ReadSymlinks bool
   111  	// Optional: Limit for visited inodes. If 0, no limit is applied.
   112  	MaxInodes int
   113  	// Optional: Files larger than this size in bytes are skipped. If 0, no limit is applied.
   114  	MaxFileSize int
   115  	// Optional: By default, inventories stores a path relative to the scan root. If StoreAbsolutePath
   116  	// is set, the absolute path is stored instead.
   117  	StoreAbsolutePath bool
   118  	// Optional: If true, print a detailed analysis of the duration of each extractor.
   119  	PrintDurationAnalysis bool
   120  	// Optional: If true, fail the scan if any permission errors are encountered.
   121  	ErrorOnFSErrors bool
   122  	// Optional: If set, this function is called for each file to check if there is a specific
   123  	// extractor for this file. If it returns an extractor, only that extractor is used for the file.
   124  	ExtractorOverride func(FileAPI) []Extractor
   125  }
   126  
   127  // Run runs the specified extractors and returns their extraction results,
   128  // as well as info about whether the plugin runs completed successfully.
   129  func Run(ctx context.Context, config *Config) (inventory.Inventory, []*plugin.Status, error) {
   130  	if len(config.Extractors) == 0 {
   131  		return inventory.Inventory{}, []*plugin.Status{}, nil
   132  	}
   133  
   134  	scanRoots, err := expandAllAbsolutePaths(config.ScanRoots)
   135  	if err != nil {
   136  		return inventory.Inventory{}, nil, err
   137  	}
   138  
   139  	wc, err := InitWalkContext(ctx, config, scanRoots)
   140  	if err != nil {
   141  		return inventory.Inventory{}, nil, err
   142  	}
   143  
   144  	var status []*plugin.Status
   145  	inv := inventory.Inventory{}
   146  	for _, root := range scanRoots {
   147  		newInv, st, err := runOnScanRoot(ctx, config, root, wc)
   148  		if err != nil {
   149  			return inv, nil, err
   150  		}
   151  
   152  		inv.Append(newInv)
   153  		status = append(status, st...)
   154  	}
   155  
   156  	return inv, status, nil
   157  }
   158  
   159  func runOnScanRoot(ctx context.Context, config *Config, scanRoot *scalibrfs.ScanRoot, wc *walkContext) (inventory.Inventory, []*plugin.Status, error) {
   160  	abs := ""
   161  	var err error
   162  	if !scanRoot.IsVirtual() {
   163  		abs, err = filepath.Abs(scanRoot.Path)
   164  		if err != nil {
   165  			return inventory.Inventory{}, nil, err
   166  		}
   167  	}
   168  	if err = wc.PrepareNewScan(abs, scanRoot.FS); err != nil {
   169  		return inventory.Inventory{}, nil, err
   170  	}
   171  
   172  	// Run extractors on the scan root
   173  	inv, status, err := RunFS(ctx, config, wc)
   174  	if err != nil {
   175  		return inv, status, err
   176  	}
   177  
   178  	// Process embedded filesystems
   179  	var additionalInv inventory.Inventory
   180  	for _, embeddedFS := range inv.EmbeddedFSs {
   181  		// Mount the embedded filesystem
   182  		mountedFS, err := embeddedFS.GetEmbeddedFS(ctx)
   183  		if err != nil {
   184  			status = append(status, &plugin.Status{
   185  				Name:    "EmbeddedFS",
   186  				Version: 1,
   187  				Status: &plugin.ScanStatus{
   188  					Status:        plugin.ScanStatusFailed,
   189  					FailureReason: fmt.Sprintf("failed to mount embedded filesystem %s: %v", embeddedFS.Path, err),
   190  				},
   191  			})
   192  			continue
   193  		}
   194  
   195  		// Create a new ScanRoot for the mounted filesystem
   196  		newScanRoot := &scalibrfs.ScanRoot{
   197  			FS:   mountedFS,
   198  			Path: "", // Virtual filesystem
   199  		}
   200  
   201  		// Reuse the existing config, updating only necessary fields
   202  		config.ScanRoots = []*scalibrfs.ScanRoot{newScanRoot}
   203  		// Clear PathsToExtract to scan entire mounted filesystem
   204  		config.PathsToExtract = []string{}
   205  
   206  		// Run extractors on the mounted filesystem using Run
   207  		mountedInv, mountedStatus, err := Run(ctx, config)
   208  		if err != nil {
   209  			status = append(status, &plugin.Status{
   210  				Name:    "EmbeddedFS",
   211  				Version: 1,
   212  				Status: &plugin.ScanStatus{
   213  					Status:        plugin.ScanStatusFailed,
   214  					FailureReason: fmt.Sprintf("failed to extract from embedded filesystem %s: %v", embeddedFS.Path, err),
   215  				},
   216  			})
   217  			continue
   218  		}
   219  
   220  		// Prepend embeddedFS.Path to Locations for all packages in mountedInv
   221  		for _, pkg := range mountedInv.Packages {
   222  			updatedLocations := make([]string, len(pkg.Locations))
   223  			for i, loc := range pkg.Locations {
   224  				updatedLocations[i] = fmt.Sprintf("%s:%s", embeddedFS.Path, loc)
   225  			}
   226  			pkg.Locations = updatedLocations
   227  		}
   228  
   229  		additionalInv.Append(mountedInv)
   230  		status = plugin.DedupeStatuses(slices.Concat(status, mountedStatus))
   231  
   232  		// Collect temporary directories and raw files after traversal for removal.
   233  		if c, ok := mountedFS.(common.CloserWithTmpPaths); ok {
   234  			embeddedFS.TempPaths = c.TempPaths()
   235  		}
   236  	}
   237  
   238  	// Combine inventories
   239  	inv.Append(additionalInv)
   240  	return inv, status, nil
   241  }
   242  
   243  // InitWalkContext initializes the walk context for a filesystem walk. It strips all the paths that
   244  // are expected to be relative to the scan root.
   245  // This function is exported for TESTS ONLY.
   246  func InitWalkContext(ctx context.Context, config *Config, absScanRoots []*scalibrfs.ScanRoot) (*walkContext, error) {
   247  	pathsToExtract, err := stripAllPathPrefixes(config.PathsToExtract, absScanRoots)
   248  	if err != nil {
   249  		return nil, err
   250  	}
   251  	pathsToExtract = toSlashPaths(pathsToExtract)
   252  
   253  	dirsToSkip, err := stripAllPathPrefixes(config.DirsToSkip, absScanRoots)
   254  	if err != nil {
   255  		return nil, err
   256  	}
   257  	dirsToSkip = toSlashPaths(dirsToSkip)
   258  
   259  	return &walkContext{
   260  		ctx:               ctx,
   261  		stats:             config.Stats,
   262  		extractors:        config.Extractors,
   263  		pathsToExtract:    pathsToExtract,
   264  		ignoreSubDirs:     config.IgnoreSubDirs,
   265  		dirsToSkip:        pathStringListToMap(dirsToSkip),
   266  		skipDirRegex:      config.SkipDirRegex,
   267  		skipDirGlob:       config.SkipDirGlob,
   268  		useGitignore:      config.UseGitignore,
   269  		readSymlinks:      config.ReadSymlinks,
   270  		maxInodes:         config.MaxInodes,
   271  		maxFileSize:       config.MaxFileSize,
   272  		inodesVisited:     0,
   273  		storeAbsolutePath: config.StoreAbsolutePath,
   274  		errorOnFSErrors:   config.ErrorOnFSErrors,
   275  		extractorOverride: config.ExtractorOverride,
   276  
   277  		lastStatus: time.Now(),
   278  
   279  		inventory: inventory.Inventory{},
   280  		errors:    make(map[string]map[string]error),
   281  		foundInv:  make(map[string]bool),
   282  
   283  		fileAPI: &lazyFileAPI{},
   284  	}, nil
   285  }
   286  
   287  // RunFS runs the specified extractors and returns their extraction results,
   288  // as well as info about whether the plugin runs completed successfully.
   289  // scanRoot is the location of fsys.
   290  // This method is for testing, use Run() to avoid confusion with scanRoot vs fsys.
   291  func RunFS(ctx context.Context, config *Config, wc *walkContext) (inventory.Inventory, []*plugin.Status, error) {
   292  	start := time.Now()
   293  	if wc == nil || wc.fs == nil {
   294  		return inventory.Inventory{}, nil, errors.New("walk context is nil")
   295  	}
   296  
   297  	var err error
   298  	log.Infof("Starting filesystem walk for root: %v", wc.scanRoot)
   299  	if len(wc.pathsToExtract) > 0 {
   300  		err = walkIndividualPaths(wc)
   301  	} else {
   302  		ticker := time.NewTicker(2 * time.Second)
   303  		quit := make(chan struct{})
   304  		go func() {
   305  			for {
   306  				select {
   307  				case <-ticker.C:
   308  					wc.printStatus()
   309  				case <-quit:
   310  					ticker.Stop()
   311  					return
   312  				}
   313  			}
   314  		}()
   315  
   316  		err = internal.WalkDirUnsorted(wc.fs, ".", wc.handleFile, wc.postHandleFile)
   317  
   318  		close(quit)
   319  	}
   320  
   321  	// On Windows, elapsed and wall time are probably the same. On Linux and Mac they are different,
   322  	// if Scalibr was suspended during runtime.
   323  	log.Infof("End status: %d dirs visited, %d inodes visited, %d Extract calls, %s elapsed, %s wall time",
   324  		wc.dirsVisited, wc.inodesVisited, wc.extractCalls, time.Since(start), time.Duration(time.Now().UnixNano()-start.UnixNano()))
   325  
   326  	return wc.inventory, errToExtractorStatus(config.Extractors, wc.foundInv, wc.errors), err
   327  }
   328  
   329  type walkContext struct {
   330  	//nolint:containedctx
   331  	ctx               context.Context
   332  	stats             stats.Collector
   333  	extractors        []Extractor
   334  	fs                scalibrfs.FS
   335  	scanRoot          string
   336  	pathsToExtract    []string
   337  	ignoreSubDirs     bool
   338  	dirsToSkip        map[string]bool // Anything under these paths should be skipped.
   339  	skipDirRegex      *regexp.Regexp
   340  	skipDirGlob       glob.Glob
   341  	useGitignore      bool
   342  	maxInodes         int
   343  	inodesVisited     int
   344  	maxFileSize       int // In bytes.
   345  	dirsVisited       int
   346  	storeAbsolutePath bool
   347  	errorOnFSErrors   bool
   348  
   349  	// applicable gitignore patterns for the current and parent directories.
   350  	gitignores []internal.GitignorePattern
   351  	// Inventories found.
   352  	inventory inventory.Inventory
   353  	// Extractor name to file path to runtime errors.
   354  	errors map[string]map[string]error
   355  	// Whether an extractor found any inventory.
   356  	foundInv map[string]bool
   357  	// Whether to read symlinks.
   358  	readSymlinks bool
   359  
   360  	// Data for status printing.
   361  	lastStatus   time.Time
   362  	lastInodes   int
   363  	extractCalls int
   364  	lastExtracts int
   365  
   366  	currentPath string
   367  	fileAPI     *lazyFileAPI
   368  
   369  	// If set, this function is called for each file to check if there is a specific
   370  	// extractor for this file. If it returns an extractor, only that extractor is used for the file.
   371  	extractorOverride func(FileAPI) []Extractor
   372  }
   373  
   374  func walkIndividualPaths(wc *walkContext) error {
   375  	for _, p := range wc.pathsToExtract {
   376  		p := filepath.ToSlash(p)
   377  		info, err := fs.Stat(wc.fs, p)
   378  		if err != nil {
   379  			err = wc.handleFile(p, nil, err)
   380  		} else {
   381  			if info.IsDir() {
   382  				// Recursively scan the contents of the directory.
   383  				if wc.useGitignore {
   384  					// Parse parent dir .gitignore files up to the scan root.
   385  					gitignores, err := internal.ParseParentGitignores(wc.fs, p)
   386  					if err != nil {
   387  						return err
   388  					}
   389  					wc.gitignores = gitignores
   390  				}
   391  				err = internal.WalkDirUnsorted(wc.fs, p, wc.handleFile, wc.postHandleFile)
   392  				wc.gitignores = nil
   393  				if err != nil {
   394  					return err
   395  				}
   396  				continue
   397  			}
   398  			err = wc.handleFile(p, fs.FileInfoToDirEntry(info), nil)
   399  		}
   400  		if err != nil {
   401  			return err
   402  		}
   403  	}
   404  	return nil
   405  }
   406  
   407  func (wc *walkContext) handleFile(path string, d fs.DirEntry, fserr error) error {
   408  	wc.currentPath = path
   409  
   410  	wc.inodesVisited++
   411  	if wc.maxInodes > 0 && wc.inodesVisited > wc.maxInodes {
   412  		return fmt.Errorf("maxInodes (%d) exceeded", wc.maxInodes)
   413  	}
   414  
   415  	wc.stats.AfterInodeVisited(path)
   416  	if wc.ctx.Err() != nil {
   417  		return wc.ctx.Err()
   418  	}
   419  	if fserr != nil {
   420  		if wc.errorOnFSErrors {
   421  			return fmt.Errorf("handleFile(%q) fserr: %w", path, fserr)
   422  		}
   423  		if os.IsPermission(fserr) {
   424  			// Permission errors are expected when traversing the entire filesystem.
   425  			log.Debugf("fserr (permission error): %v", fserr)
   426  		} else {
   427  			log.Errorf("fserr (non-permission error): %v", fserr)
   428  		}
   429  		return nil
   430  	}
   431  
   432  	wc.fileAPI.currentPath = path
   433  	wc.fileAPI.currentStatCalled = false
   434  
   435  	if d.Type().IsDir() {
   436  		wc.dirsVisited++
   437  		if wc.useGitignore {
   438  			gitignores := internal.EmptyGitignore()
   439  			var err error
   440  			if !wc.shouldSkipDir(path) {
   441  				gitignores, err = internal.ParseDirForGitignore(wc.fs, path)
   442  				if err != nil {
   443  					return err
   444  				}
   445  			}
   446  			wc.gitignores = append(wc.gitignores, gitignores)
   447  		}
   448  
   449  		exts := wc.extractors
   450  		ignoreFileRequired := false
   451  		// Pass the path to the extractors that extract from directories.
   452  		if wc.extractorOverride != nil {
   453  			if overrideExts := wc.extractorOverride(wc.fileAPI); len(overrideExts) > 0 {
   454  				exts = overrideExts
   455  				ignoreFileRequired = true
   456  			}
   457  		}
   458  
   459  		for _, ex := range exts {
   460  			if ex.Requirements().ExtractFromDirs &&
   461  				(ignoreFileRequired || ex.FileRequired(wc.fileAPI)) {
   462  				wc.runExtractor(ex, path, true)
   463  			}
   464  		}
   465  
   466  		if wc.shouldSkipDir(path) { // Skip everything inside this dir.
   467  			return fs.SkipDir
   468  		}
   469  		return nil
   470  	}
   471  
   472  	// Ignore non regular files except symlinks.
   473  	if !d.Type().IsRegular() {
   474  		// Ignore the file because symlink reading is disabled.
   475  		if !wc.readSymlinks {
   476  			return nil
   477  		}
   478  		// Ignore non-symlinks.
   479  		if (d.Type() & fs.ModeType) != fs.ModeSymlink {
   480  			return nil
   481  		}
   482  	}
   483  
   484  	if wc.useGitignore {
   485  		if internal.GitignoreMatch(wc.gitignores, strings.Split(path, "/"), false) {
   486  			return nil
   487  		}
   488  	}
   489  
   490  	exts := wc.extractors
   491  	ignoreFileRequired := false
   492  	// Pass the path to the extractors that extract from directories.
   493  	if wc.extractorOverride != nil {
   494  		if overrideExts := wc.extractorOverride(wc.fileAPI); len(overrideExts) > 0 {
   495  			exts = overrideExts
   496  			ignoreFileRequired = true
   497  		}
   498  	}
   499  
   500  	fSize := int64(-1) // -1 means we haven't checked the file size yet.
   501  	for _, ex := range exts {
   502  		if !ex.Requirements().ExtractFromDirs &&
   503  			(ignoreFileRequired || ex.FileRequired(wc.fileAPI)) {
   504  			if wc.maxFileSize > 0 && fSize == -1 {
   505  				var err error
   506  				fSize, err = fileSize(wc.fileAPI)
   507  				if err != nil {
   508  					return fmt.Errorf("failed to get file size for %q: %w", path, err)
   509  				}
   510  				if fSize > int64(wc.maxFileSize) {
   511  					log.Debugf("Skipping file %q because it has size %d bytes and the maximum is %d bytes", path, fSize, wc.maxFileSize)
   512  					return nil
   513  				}
   514  			}
   515  
   516  			wc.runExtractor(ex, path, false)
   517  		}
   518  	}
   519  	return nil
   520  }
   521  
   522  func (wc *walkContext) postHandleFile(path string, d fs.DirEntry) {
   523  	if len(wc.gitignores) > 0 && d.Type().IsDir() {
   524  		// Remove .gitignores that applied to this directory.
   525  		wc.gitignores = wc.gitignores[:len(wc.gitignores)-1]
   526  	}
   527  }
   528  
   529  type lazyFileAPI struct {
   530  	fs                scalibrfs.FS
   531  	currentPath       string
   532  	currentFileInfo   fs.FileInfo
   533  	currentStatErr    error
   534  	currentStatCalled bool
   535  }
   536  
   537  func (api *lazyFileAPI) Path() string {
   538  	return api.currentPath
   539  }
   540  func (api *lazyFileAPI) Stat() (fs.FileInfo, error) {
   541  	if !api.currentStatCalled {
   542  		api.currentStatCalled = true
   543  		api.currentFileInfo, api.currentStatErr = fs.Stat(api.fs, api.currentPath)
   544  	}
   545  	return api.currentFileInfo, api.currentStatErr
   546  }
   547  
   548  func (wc *walkContext) shouldSkipDir(path string) bool {
   549  	if _, ok := wc.dirsToSkip[path]; ok {
   550  		return true
   551  	}
   552  	if wc.ignoreSubDirs && !slices.Contains(wc.pathsToExtract, path) {
   553  		// Skip dirs that aren't one of the root dirs.
   554  		return true
   555  	}
   556  	if wc.useGitignore && internal.GitignoreMatch(wc.gitignores, strings.Split(path, "/"), true) {
   557  		return true
   558  	}
   559  	if wc.skipDirRegex != nil {
   560  		return wc.skipDirRegex.MatchString(path)
   561  	}
   562  	if wc.skipDirGlob != nil {
   563  		return wc.skipDirGlob.Match(path)
   564  	}
   565  	return false
   566  }
   567  
   568  func (wc *walkContext) runExtractor(ex Extractor, path string, isDir bool) {
   569  	var rc fs.File
   570  	var info fs.FileInfo
   571  	var err error
   572  	if !isDir {
   573  		rc, err = wc.fs.Open(path)
   574  		if err != nil {
   575  			addErrToMap(wc.errors, ex.Name(), path, fmt.Errorf("Open(%s): %w", path, err))
   576  			return
   577  		}
   578  		defer rc.Close()
   579  
   580  		info, err = rc.Stat()
   581  		if err != nil {
   582  			addErrToMap(wc.errors, ex.Name(), path, fmt.Errorf("stat(%s): %w", path, err))
   583  			return
   584  		}
   585  	}
   586  
   587  	wc.extractCalls++
   588  
   589  	start := time.Now()
   590  	results, err := ex.Extract(wc.ctx, &ScanInput{
   591  		FS:     wc.fs,
   592  		Path:   path,
   593  		Root:   wc.scanRoot,
   594  		Info:   info,
   595  		Reader: rc,
   596  	})
   597  	wc.stats.AfterExtractorRun(ex.Name(), &stats.AfterExtractorStats{
   598  		Path:      path,
   599  		Root:      wc.scanRoot,
   600  		Runtime:   time.Since(start),
   601  		Inventory: &results,
   602  		Error:     err,
   603  	})
   604  
   605  	if err != nil {
   606  		addErrToMap(wc.errors, ex.Name(), path, err)
   607  	}
   608  
   609  	if !results.IsEmpty() {
   610  		wc.foundInv[ex.Name()] = true
   611  		for _, r := range results.Packages {
   612  			r.Plugins = append(r.Plugins, ex.Name())
   613  			if wc.storeAbsolutePath {
   614  				r.Locations = expandAbsolutePath(wc.scanRoot, r.Locations)
   615  			}
   616  		}
   617  		wc.inventory.Append(results)
   618  	}
   619  }
   620  
   621  // PrepareNewScan updates the scan root and the filesystem to use for the filesystem walk.
   622  // It also resets the inventory.
   623  // currentRoot is expected to be an absolute path.
   624  func (wc *walkContext) PrepareNewScan(absRoot string, fs scalibrfs.FS) error {
   625  	wc.scanRoot = absRoot
   626  	wc.fs = fs
   627  	wc.fileAPI.fs = fs
   628  	wc.inventory = inventory.Inventory{}
   629  	return nil
   630  }
   631  
   632  func expandAbsolutePath(scanRoot string, paths []string) []string {
   633  	var locations []string
   634  	for _, l := range paths {
   635  		locations = append(locations, filepath.Join(scanRoot, l))
   636  	}
   637  	return locations
   638  }
   639  
   640  func expandAllAbsolutePaths(scanRoots []*scalibrfs.ScanRoot) ([]*scalibrfs.ScanRoot, error) {
   641  	var result []*scalibrfs.ScanRoot
   642  	for _, r := range scanRoots {
   643  		abs, err := r.WithAbsolutePath()
   644  		if err != nil {
   645  			return nil, err
   646  		}
   647  		result = append(result, abs)
   648  	}
   649  
   650  	return result, nil
   651  }
   652  
   653  func stripAllPathPrefixes(paths []string, scanRoots []*scalibrfs.ScanRoot) ([]string, error) {
   654  	if len(scanRoots) > 0 && scanRoots[0].IsVirtual() {
   655  		// We're using a virtual filesystem with no real absolute paths.
   656  		return paths, nil
   657  	}
   658  	result := make([]string, 0, len(paths))
   659  	for _, p := range paths {
   660  		abs, err := filepath.Abs(p)
   661  		if err != nil {
   662  			return nil, err
   663  		}
   664  
   665  		rp, err := stripFromAtLeastOnePrefix(abs, scanRoots)
   666  		if err != nil {
   667  			return nil, err
   668  		}
   669  		result = append(result, rp)
   670  	}
   671  
   672  	return result, nil
   673  }
   674  
   675  // toSlashPaths returns a new []string that converts all paths to use /
   676  func toSlashPaths(paths []string) []string {
   677  	returnPaths := make([]string, len(paths))
   678  	for i, s := range paths {
   679  		returnPaths[i] = filepath.ToSlash(s)
   680  	}
   681  
   682  	return returnPaths
   683  }
   684  
   685  // stripFromAtLeastOnePrefix returns the path relative to the first prefix it is relative to.
   686  // If the path is not relative to any of the prefixes, an error is returned.
   687  // The path is expected to be absolute.
   688  func stripFromAtLeastOnePrefix(path string, scanRoots []*scalibrfs.ScanRoot) (string, error) {
   689  	for _, r := range scanRoots {
   690  		if !strings.HasPrefix(path, r.Path) {
   691  			continue
   692  		}
   693  		rel, err := filepath.Rel(r.Path, path)
   694  		if err != nil {
   695  			return "", err
   696  		}
   697  
   698  		return rel, nil
   699  	}
   700  
   701  	return "", ErrNotRelativeToScanRoots
   702  }
   703  
   704  func pathStringListToMap(paths []string) map[string]bool {
   705  	result := make(map[string]bool)
   706  	for _, p := range paths {
   707  		result[p] = true
   708  	}
   709  	return result
   710  }
   711  
   712  func addErrToMap(errors map[string]map[string]error, extractor string, path string, err error) {
   713  	if _, ok := errors[extractor]; !ok {
   714  		errors[extractor] = make(map[string]error)
   715  	}
   716  	errors[extractor][path] = err
   717  }
   718  
   719  func errToExtractorStatus(extractors []Extractor, foundInv map[string]bool, errs map[string]map[string]error) []*plugin.Status {
   720  	result := make([]*plugin.Status, 0, len(extractors))
   721  	for _, ex := range extractors {
   722  		fileErrs := createFileErrorsForPlugin(errs[ex.Name()])
   723  		result = append(result, plugin.StatusFromErr(ex, foundInv[ex.Name()], plugin.OverallErrFromFileErrs(fileErrs), fileErrs))
   724  	}
   725  	return result
   726  }
   727  
   728  func createFileErrorsForPlugin(errorMap map[string]error) []*plugin.FileError {
   729  	if len(errorMap) == 0 {
   730  		return nil
   731  	}
   732  
   733  	var fileErrors []*plugin.FileError
   734  	for path, err := range errorMap {
   735  		fileErrors = append(fileErrors, &plugin.FileError{
   736  			FilePath:     path,
   737  			ErrorMessage: err.Error(),
   738  		})
   739  	}
   740  	return fileErrors
   741  }
   742  
   743  func (wc *walkContext) printStatus() {
   744  	log.Infof("Status: new inodes: %d, %.1f inodes/s, new extract calls: %d, path: %q\n",
   745  		wc.inodesVisited-wc.lastInodes,
   746  		float64(wc.inodesVisited-wc.lastInodes)/time.Since(wc.lastStatus).Seconds(),
   747  		wc.extractCalls-wc.lastExtracts, wc.currentPath)
   748  
   749  	wc.lastStatus = time.Now()
   750  	wc.lastInodes = wc.inodesVisited
   751  	wc.lastExtracts = wc.extractCalls
   752  }
   753  
   754  // GetRealPath returns the real absolute path of the file on the scanning host's filesystem.
   755  // If the file is on a virtual filesystem (e.g. a remote container), it is first copied into a
   756  // temporary directory on the scanning host's filesystem. It's up to the caller to delete the
   757  // directory once they're done using it.
   758  func (i *ScanInput) GetRealPath() (string, error) {
   759  	return scalibrfs.GetRealPath(&scalibrfs.ScanRoot{FS: i.FS, Path: i.Root}, i.Path, i.Reader)
   760  }
   761  
   762  // TODO(b/380419487): This list is not exhaustive. We should add more extensions here.
   763  var (
   764  	unlikelyExecutableExtensions = map[string]bool{
   765  		".c":             true,
   766  		".cc":            true,
   767  		".cargo-ok":      true,
   768  		".crate":         true,
   769  		".css":           true,
   770  		".db":            true,
   771  		".gitattributes": true,
   772  		".gitignore":     true,
   773  		".go":            true,
   774  		".h":             true,
   775  		".html":          true,
   776  		".jpg":           true,
   777  		".json":          true,
   778  		".lock":          true,
   779  		".log":           true,
   780  		".md":            true,
   781  		".mod":           true,
   782  		".png":           true,
   783  		".proto":         true,
   784  		".rs":            true,
   785  		".stderr":        true,
   786  		".sum":           true,
   787  		".svg":           true,
   788  		".tar":           true,
   789  		".tmpl":          true,
   790  		".toml":          true,
   791  		".txt":           true,
   792  		".woff2":         true,
   793  		".xml":           true,
   794  		".yaml":          true,
   795  		".yml":           true,
   796  		".zip":           true,
   797  		".ziphash":       true,
   798  	}
   799  
   800  	// Always interesting binary extensions
   801  	likelyFileExts = map[string]bool{
   802  		".a": true,
   803  		// Binary extensions
   804  		".bin": true,
   805  		".elf": true,
   806  		".run": true,
   807  		".o":   true,
   808  		// Windows Binary extensions:
   809  		".exe": true,
   810  		".dll": true,
   811  
   812  		// Shared library: true extension: true
   813  		".so": true,
   814  		// and .so: true.[number]
   815  
   816  		// Script extensions: true
   817  		".py":   true, // Python
   818  		".sh":   true, // bash/sh/zsh
   819  		".bash": true,
   820  
   821  		".pl":  true, // Perl
   822  		".rb":  true, // Ruby
   823  		".php": true, // Php
   824  		".awk": true, // Awk
   825  		".tcl": true, // tcl
   826  	}
   827  	likelyFileExtRegexes = map[string]*regexp.Regexp{
   828  		".so.": regexp.MustCompile(`.so.\d+$`),
   829  	}
   830  )
   831  
   832  // IsInterestingExecutable returns true if the specified file is an executable which may need scanning.
   833  func IsInterestingExecutable(api FileAPI) bool {
   834  	path := api.Path()
   835  	extension := filepath.Ext(path)
   836  	if unlikelyExecutableExtensions[extension] {
   837  		return false
   838  	}
   839  
   840  	if likelyFileExts[extension] {
   841  		return true
   842  	}
   843  
   844  	for substrTest, regex := range likelyFileExtRegexes {
   845  		if strings.Contains(path, substrTest) && regex.MatchString(path) {
   846  			return true
   847  		}
   848  	}
   849  
   850  	mode, err := api.Stat()
   851  	return err == nil && mode.Mode()&0111 != 0
   852  }
   853  
   854  func fileSize(file FileAPI) (int64, error) {
   855  	info, err := file.Stat()
   856  	if err != nil {
   857  		return 0, err
   858  	}
   859  	return info.Size(), nil
   860  }