github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/filesystem.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package filesystem provides the interface for inventory extraction plugins. 16 package filesystem 17 18 import ( 19 "context" 20 "errors" 21 "fmt" 22 "io" 23 "io/fs" 24 "os" 25 "path/filepath" 26 "regexp" 27 "slices" 28 "strings" 29 "time" 30 31 "github.com/gobwas/glob" 32 "github.com/google/osv-scalibr/extractor" 33 "github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/common" 34 "github.com/google/osv-scalibr/extractor/filesystem/internal" 35 scalibrfs "github.com/google/osv-scalibr/fs" 36 "github.com/google/osv-scalibr/inventory" 37 "github.com/google/osv-scalibr/log" 38 "github.com/google/osv-scalibr/plugin" 39 "github.com/google/osv-scalibr/stats" 40 ) 41 42 var ( 43 // ErrNotRelativeToScanRoots is returned when one of the file or directory to be retrieved or 44 // skipped is not relative to any of the scan roots. 45 ErrNotRelativeToScanRoots = errors.New("path not relative to any of the scan roots") 46 // ErrFailedToOpenFile is returned when opening a file fails. 47 ErrFailedToOpenFile = errors.New("failed to open file") 48 ) 49 50 // Extractor is the filesystem-based inventory extraction plugin, used to extract inventory data 51 // from the filesystem such as OS and language packages. 52 type Extractor interface { 53 extractor.Extractor 54 // FileRequired should return true if the file described by path and file info is 55 // relevant for the extractor. 56 // Note that the plugin doesn't traverse the filesystem itself but relies on the core 57 // library for that. 58 FileRequired(api FileAPI) bool 59 // Extract extracts inventory data relevant for the extractor from a given file. 60 Extract(ctx context.Context, input *ScanInput) (inventory.Inventory, error) 61 } 62 63 // FileAPI is the interface for accessing file information and path. 64 type FileAPI interface { 65 // Stat returns the file info for the file. 66 Stat() (fs.FileInfo, error) 67 Path() string 68 } 69 70 // ScanInput describes one file to extract from. 71 type ScanInput struct { 72 // FS for file access. This is rooted at Root. 73 FS scalibrfs.FS 74 // The path of the file to extract, relative to Root. 75 Path string 76 // The root directory where the extraction file walking started from. 77 Root string 78 Info fs.FileInfo 79 // A reader for accessing contents of the file. 80 // Note that the file is closed by the core library, not the plugin. 81 Reader io.Reader 82 } 83 84 // Config stores the config settings for an extraction run. 85 type Config struct { 86 Extractors []Extractor 87 ScanRoots []*scalibrfs.ScanRoot 88 // Optional: Individual files to extract inventory from. If specified, the 89 // extractors will only look at these files during the filesystem traversal. 90 // Note that these are not relative to the ScanRoots and thus need to be 91 // sub-directories of one of the ScanRoots. 92 PathsToExtract []string 93 // Optional: If true, only the files in the top-level directories in PathsToExtract are 94 // extracted and sub-directories are ignored. 95 IgnoreSubDirs bool 96 // Optional: Directories that the file system walk should ignore. 97 // Note that these are not relative to the ScanRoots and thus need to be 98 // sub-directories of one of the ScanRoots. 99 // TODO(b/279413691): Also skip local paths, e.g. "Skip all .git dirs" 100 DirsToSkip []string 101 // Optional: If the regex matches a directory, it will be skipped. 102 SkipDirRegex *regexp.Regexp 103 // Optional: If the regex matches a glob, it will be skipped. 104 SkipDirGlob glob.Glob 105 // Optional: Skip files declared in .gitignore files in source repos. 106 UseGitignore bool 107 // Optional: stats allows to enter a metric hook. If left nil, no metrics will be recorded. 108 Stats stats.Collector 109 // Optional: Whether to read symlinks. 110 ReadSymlinks bool 111 // Optional: Limit for visited inodes. If 0, no limit is applied. 112 MaxInodes int 113 // Optional: Files larger than this size in bytes are skipped. If 0, no limit is applied. 114 MaxFileSize int 115 // Optional: By default, inventories stores a path relative to the scan root. If StoreAbsolutePath 116 // is set, the absolute path is stored instead. 117 StoreAbsolutePath bool 118 // Optional: If true, print a detailed analysis of the duration of each extractor. 119 PrintDurationAnalysis bool 120 // Optional: If true, fail the scan if any permission errors are encountered. 121 ErrorOnFSErrors bool 122 // Optional: If set, this function is called for each file to check if there is a specific 123 // extractor for this file. If it returns an extractor, only that extractor is used for the file. 124 ExtractorOverride func(FileAPI) []Extractor 125 } 126 127 // Run runs the specified extractors and returns their extraction results, 128 // as well as info about whether the plugin runs completed successfully. 129 func Run(ctx context.Context, config *Config) (inventory.Inventory, []*plugin.Status, error) { 130 if len(config.Extractors) == 0 { 131 return inventory.Inventory{}, []*plugin.Status{}, nil 132 } 133 134 scanRoots, err := expandAllAbsolutePaths(config.ScanRoots) 135 if err != nil { 136 return inventory.Inventory{}, nil, err 137 } 138 139 wc, err := InitWalkContext(ctx, config, scanRoots) 140 if err != nil { 141 return inventory.Inventory{}, nil, err 142 } 143 144 var status []*plugin.Status 145 inv := inventory.Inventory{} 146 for _, root := range scanRoots { 147 newInv, st, err := runOnScanRoot(ctx, config, root, wc) 148 if err != nil { 149 return inv, nil, err 150 } 151 152 inv.Append(newInv) 153 status = append(status, st...) 154 } 155 156 return inv, status, nil 157 } 158 159 func runOnScanRoot(ctx context.Context, config *Config, scanRoot *scalibrfs.ScanRoot, wc *walkContext) (inventory.Inventory, []*plugin.Status, error) { 160 abs := "" 161 var err error 162 if !scanRoot.IsVirtual() { 163 abs, err = filepath.Abs(scanRoot.Path) 164 if err != nil { 165 return inventory.Inventory{}, nil, err 166 } 167 } 168 if err = wc.PrepareNewScan(abs, scanRoot.FS); err != nil { 169 return inventory.Inventory{}, nil, err 170 } 171 172 // Run extractors on the scan root 173 inv, status, err := RunFS(ctx, config, wc) 174 if err != nil { 175 return inv, status, err 176 } 177 178 // Process embedded filesystems 179 var additionalInv inventory.Inventory 180 for _, embeddedFS := range inv.EmbeddedFSs { 181 // Mount the embedded filesystem 182 mountedFS, err := embeddedFS.GetEmbeddedFS(ctx) 183 if err != nil { 184 status = append(status, &plugin.Status{ 185 Name: "EmbeddedFS", 186 Version: 1, 187 Status: &plugin.ScanStatus{ 188 Status: plugin.ScanStatusFailed, 189 FailureReason: fmt.Sprintf("failed to mount embedded filesystem %s: %v", embeddedFS.Path, err), 190 }, 191 }) 192 continue 193 } 194 195 // Create a new ScanRoot for the mounted filesystem 196 newScanRoot := &scalibrfs.ScanRoot{ 197 FS: mountedFS, 198 Path: "", // Virtual filesystem 199 } 200 201 // Reuse the existing config, updating only necessary fields 202 config.ScanRoots = []*scalibrfs.ScanRoot{newScanRoot} 203 // Clear PathsToExtract to scan entire mounted filesystem 204 config.PathsToExtract = []string{} 205 206 // Run extractors on the mounted filesystem using Run 207 mountedInv, mountedStatus, err := Run(ctx, config) 208 if err != nil { 209 status = append(status, &plugin.Status{ 210 Name: "EmbeddedFS", 211 Version: 1, 212 Status: &plugin.ScanStatus{ 213 Status: plugin.ScanStatusFailed, 214 FailureReason: fmt.Sprintf("failed to extract from embedded filesystem %s: %v", embeddedFS.Path, err), 215 }, 216 }) 217 continue 218 } 219 220 // Prepend embeddedFS.Path to Locations for all packages in mountedInv 221 for _, pkg := range mountedInv.Packages { 222 updatedLocations := make([]string, len(pkg.Locations)) 223 for i, loc := range pkg.Locations { 224 updatedLocations[i] = fmt.Sprintf("%s:%s", embeddedFS.Path, loc) 225 } 226 pkg.Locations = updatedLocations 227 } 228 229 additionalInv.Append(mountedInv) 230 status = plugin.DedupeStatuses(slices.Concat(status, mountedStatus)) 231 232 // Collect temporary directories and raw files after traversal for removal. 233 if c, ok := mountedFS.(common.CloserWithTmpPaths); ok { 234 embeddedFS.TempPaths = c.TempPaths() 235 } 236 } 237 238 // Combine inventories 239 inv.Append(additionalInv) 240 return inv, status, nil 241 } 242 243 // InitWalkContext initializes the walk context for a filesystem walk. It strips all the paths that 244 // are expected to be relative to the scan root. 245 // This function is exported for TESTS ONLY. 246 func InitWalkContext(ctx context.Context, config *Config, absScanRoots []*scalibrfs.ScanRoot) (*walkContext, error) { 247 pathsToExtract, err := stripAllPathPrefixes(config.PathsToExtract, absScanRoots) 248 if err != nil { 249 return nil, err 250 } 251 pathsToExtract = toSlashPaths(pathsToExtract) 252 253 dirsToSkip, err := stripAllPathPrefixes(config.DirsToSkip, absScanRoots) 254 if err != nil { 255 return nil, err 256 } 257 dirsToSkip = toSlashPaths(dirsToSkip) 258 259 return &walkContext{ 260 ctx: ctx, 261 stats: config.Stats, 262 extractors: config.Extractors, 263 pathsToExtract: pathsToExtract, 264 ignoreSubDirs: config.IgnoreSubDirs, 265 dirsToSkip: pathStringListToMap(dirsToSkip), 266 skipDirRegex: config.SkipDirRegex, 267 skipDirGlob: config.SkipDirGlob, 268 useGitignore: config.UseGitignore, 269 readSymlinks: config.ReadSymlinks, 270 maxInodes: config.MaxInodes, 271 maxFileSize: config.MaxFileSize, 272 inodesVisited: 0, 273 storeAbsolutePath: config.StoreAbsolutePath, 274 errorOnFSErrors: config.ErrorOnFSErrors, 275 extractorOverride: config.ExtractorOverride, 276 277 lastStatus: time.Now(), 278 279 inventory: inventory.Inventory{}, 280 errors: make(map[string]map[string]error), 281 foundInv: make(map[string]bool), 282 283 fileAPI: &lazyFileAPI{}, 284 }, nil 285 } 286 287 // RunFS runs the specified extractors and returns their extraction results, 288 // as well as info about whether the plugin runs completed successfully. 289 // scanRoot is the location of fsys. 290 // This method is for testing, use Run() to avoid confusion with scanRoot vs fsys. 291 func RunFS(ctx context.Context, config *Config, wc *walkContext) (inventory.Inventory, []*plugin.Status, error) { 292 start := time.Now() 293 if wc == nil || wc.fs == nil { 294 return inventory.Inventory{}, nil, errors.New("walk context is nil") 295 } 296 297 var err error 298 log.Infof("Starting filesystem walk for root: %v", wc.scanRoot) 299 if len(wc.pathsToExtract) > 0 { 300 err = walkIndividualPaths(wc) 301 } else { 302 ticker := time.NewTicker(2 * time.Second) 303 quit := make(chan struct{}) 304 go func() { 305 for { 306 select { 307 case <-ticker.C: 308 wc.printStatus() 309 case <-quit: 310 ticker.Stop() 311 return 312 } 313 } 314 }() 315 316 err = internal.WalkDirUnsorted(wc.fs, ".", wc.handleFile, wc.postHandleFile) 317 318 close(quit) 319 } 320 321 // On Windows, elapsed and wall time are probably the same. On Linux and Mac they are different, 322 // if Scalibr was suspended during runtime. 323 log.Infof("End status: %d dirs visited, %d inodes visited, %d Extract calls, %s elapsed, %s wall time", 324 wc.dirsVisited, wc.inodesVisited, wc.extractCalls, time.Since(start), time.Duration(time.Now().UnixNano()-start.UnixNano())) 325 326 return wc.inventory, errToExtractorStatus(config.Extractors, wc.foundInv, wc.errors), err 327 } 328 329 type walkContext struct { 330 //nolint:containedctx 331 ctx context.Context 332 stats stats.Collector 333 extractors []Extractor 334 fs scalibrfs.FS 335 scanRoot string 336 pathsToExtract []string 337 ignoreSubDirs bool 338 dirsToSkip map[string]bool // Anything under these paths should be skipped. 339 skipDirRegex *regexp.Regexp 340 skipDirGlob glob.Glob 341 useGitignore bool 342 maxInodes int 343 inodesVisited int 344 maxFileSize int // In bytes. 345 dirsVisited int 346 storeAbsolutePath bool 347 errorOnFSErrors bool 348 349 // applicable gitignore patterns for the current and parent directories. 350 gitignores []internal.GitignorePattern 351 // Inventories found. 352 inventory inventory.Inventory 353 // Extractor name to file path to runtime errors. 354 errors map[string]map[string]error 355 // Whether an extractor found any inventory. 356 foundInv map[string]bool 357 // Whether to read symlinks. 358 readSymlinks bool 359 360 // Data for status printing. 361 lastStatus time.Time 362 lastInodes int 363 extractCalls int 364 lastExtracts int 365 366 currentPath string 367 fileAPI *lazyFileAPI 368 369 // If set, this function is called for each file to check if there is a specific 370 // extractor for this file. If it returns an extractor, only that extractor is used for the file. 371 extractorOverride func(FileAPI) []Extractor 372 } 373 374 func walkIndividualPaths(wc *walkContext) error { 375 for _, p := range wc.pathsToExtract { 376 p := filepath.ToSlash(p) 377 info, err := fs.Stat(wc.fs, p) 378 if err != nil { 379 err = wc.handleFile(p, nil, err) 380 } else { 381 if info.IsDir() { 382 // Recursively scan the contents of the directory. 383 if wc.useGitignore { 384 // Parse parent dir .gitignore files up to the scan root. 385 gitignores, err := internal.ParseParentGitignores(wc.fs, p) 386 if err != nil { 387 return err 388 } 389 wc.gitignores = gitignores 390 } 391 err = internal.WalkDirUnsorted(wc.fs, p, wc.handleFile, wc.postHandleFile) 392 wc.gitignores = nil 393 if err != nil { 394 return err 395 } 396 continue 397 } 398 err = wc.handleFile(p, fs.FileInfoToDirEntry(info), nil) 399 } 400 if err != nil { 401 return err 402 } 403 } 404 return nil 405 } 406 407 func (wc *walkContext) handleFile(path string, d fs.DirEntry, fserr error) error { 408 wc.currentPath = path 409 410 wc.inodesVisited++ 411 if wc.maxInodes > 0 && wc.inodesVisited > wc.maxInodes { 412 return fmt.Errorf("maxInodes (%d) exceeded", wc.maxInodes) 413 } 414 415 wc.stats.AfterInodeVisited(path) 416 if wc.ctx.Err() != nil { 417 return wc.ctx.Err() 418 } 419 if fserr != nil { 420 if wc.errorOnFSErrors { 421 return fmt.Errorf("handleFile(%q) fserr: %w", path, fserr) 422 } 423 if os.IsPermission(fserr) { 424 // Permission errors are expected when traversing the entire filesystem. 425 log.Debugf("fserr (permission error): %v", fserr) 426 } else { 427 log.Errorf("fserr (non-permission error): %v", fserr) 428 } 429 return nil 430 } 431 432 wc.fileAPI.currentPath = path 433 wc.fileAPI.currentStatCalled = false 434 435 if d.Type().IsDir() { 436 wc.dirsVisited++ 437 if wc.useGitignore { 438 gitignores := internal.EmptyGitignore() 439 var err error 440 if !wc.shouldSkipDir(path) { 441 gitignores, err = internal.ParseDirForGitignore(wc.fs, path) 442 if err != nil { 443 return err 444 } 445 } 446 wc.gitignores = append(wc.gitignores, gitignores) 447 } 448 449 exts := wc.extractors 450 ignoreFileRequired := false 451 // Pass the path to the extractors that extract from directories. 452 if wc.extractorOverride != nil { 453 if overrideExts := wc.extractorOverride(wc.fileAPI); len(overrideExts) > 0 { 454 exts = overrideExts 455 ignoreFileRequired = true 456 } 457 } 458 459 for _, ex := range exts { 460 if ex.Requirements().ExtractFromDirs && 461 (ignoreFileRequired || ex.FileRequired(wc.fileAPI)) { 462 wc.runExtractor(ex, path, true) 463 } 464 } 465 466 if wc.shouldSkipDir(path) { // Skip everything inside this dir. 467 return fs.SkipDir 468 } 469 return nil 470 } 471 472 // Ignore non regular files except symlinks. 473 if !d.Type().IsRegular() { 474 // Ignore the file because symlink reading is disabled. 475 if !wc.readSymlinks { 476 return nil 477 } 478 // Ignore non-symlinks. 479 if (d.Type() & fs.ModeType) != fs.ModeSymlink { 480 return nil 481 } 482 } 483 484 if wc.useGitignore { 485 if internal.GitignoreMatch(wc.gitignores, strings.Split(path, "/"), false) { 486 return nil 487 } 488 } 489 490 exts := wc.extractors 491 ignoreFileRequired := false 492 // Pass the path to the extractors that extract from directories. 493 if wc.extractorOverride != nil { 494 if overrideExts := wc.extractorOverride(wc.fileAPI); len(overrideExts) > 0 { 495 exts = overrideExts 496 ignoreFileRequired = true 497 } 498 } 499 500 fSize := int64(-1) // -1 means we haven't checked the file size yet. 501 for _, ex := range exts { 502 if !ex.Requirements().ExtractFromDirs && 503 (ignoreFileRequired || ex.FileRequired(wc.fileAPI)) { 504 if wc.maxFileSize > 0 && fSize == -1 { 505 var err error 506 fSize, err = fileSize(wc.fileAPI) 507 if err != nil { 508 return fmt.Errorf("failed to get file size for %q: %w", path, err) 509 } 510 if fSize > int64(wc.maxFileSize) { 511 log.Debugf("Skipping file %q because it has size %d bytes and the maximum is %d bytes", path, fSize, wc.maxFileSize) 512 return nil 513 } 514 } 515 516 wc.runExtractor(ex, path, false) 517 } 518 } 519 return nil 520 } 521 522 func (wc *walkContext) postHandleFile(path string, d fs.DirEntry) { 523 if len(wc.gitignores) > 0 && d.Type().IsDir() { 524 // Remove .gitignores that applied to this directory. 525 wc.gitignores = wc.gitignores[:len(wc.gitignores)-1] 526 } 527 } 528 529 type lazyFileAPI struct { 530 fs scalibrfs.FS 531 currentPath string 532 currentFileInfo fs.FileInfo 533 currentStatErr error 534 currentStatCalled bool 535 } 536 537 func (api *lazyFileAPI) Path() string { 538 return api.currentPath 539 } 540 func (api *lazyFileAPI) Stat() (fs.FileInfo, error) { 541 if !api.currentStatCalled { 542 api.currentStatCalled = true 543 api.currentFileInfo, api.currentStatErr = fs.Stat(api.fs, api.currentPath) 544 } 545 return api.currentFileInfo, api.currentStatErr 546 } 547 548 func (wc *walkContext) shouldSkipDir(path string) bool { 549 if _, ok := wc.dirsToSkip[path]; ok { 550 return true 551 } 552 if wc.ignoreSubDirs && !slices.Contains(wc.pathsToExtract, path) { 553 // Skip dirs that aren't one of the root dirs. 554 return true 555 } 556 if wc.useGitignore && internal.GitignoreMatch(wc.gitignores, strings.Split(path, "/"), true) { 557 return true 558 } 559 if wc.skipDirRegex != nil { 560 return wc.skipDirRegex.MatchString(path) 561 } 562 if wc.skipDirGlob != nil { 563 return wc.skipDirGlob.Match(path) 564 } 565 return false 566 } 567 568 func (wc *walkContext) runExtractor(ex Extractor, path string, isDir bool) { 569 var rc fs.File 570 var info fs.FileInfo 571 var err error 572 if !isDir { 573 rc, err = wc.fs.Open(path) 574 if err != nil { 575 addErrToMap(wc.errors, ex.Name(), path, fmt.Errorf("Open(%s): %w", path, err)) 576 return 577 } 578 defer rc.Close() 579 580 info, err = rc.Stat() 581 if err != nil { 582 addErrToMap(wc.errors, ex.Name(), path, fmt.Errorf("stat(%s): %w", path, err)) 583 return 584 } 585 } 586 587 wc.extractCalls++ 588 589 start := time.Now() 590 results, err := ex.Extract(wc.ctx, &ScanInput{ 591 FS: wc.fs, 592 Path: path, 593 Root: wc.scanRoot, 594 Info: info, 595 Reader: rc, 596 }) 597 wc.stats.AfterExtractorRun(ex.Name(), &stats.AfterExtractorStats{ 598 Path: path, 599 Root: wc.scanRoot, 600 Runtime: time.Since(start), 601 Inventory: &results, 602 Error: err, 603 }) 604 605 if err != nil { 606 addErrToMap(wc.errors, ex.Name(), path, err) 607 } 608 609 if !results.IsEmpty() { 610 wc.foundInv[ex.Name()] = true 611 for _, r := range results.Packages { 612 r.Plugins = append(r.Plugins, ex.Name()) 613 if wc.storeAbsolutePath { 614 r.Locations = expandAbsolutePath(wc.scanRoot, r.Locations) 615 } 616 } 617 wc.inventory.Append(results) 618 } 619 } 620 621 // PrepareNewScan updates the scan root and the filesystem to use for the filesystem walk. 622 // It also resets the inventory. 623 // currentRoot is expected to be an absolute path. 624 func (wc *walkContext) PrepareNewScan(absRoot string, fs scalibrfs.FS) error { 625 wc.scanRoot = absRoot 626 wc.fs = fs 627 wc.fileAPI.fs = fs 628 wc.inventory = inventory.Inventory{} 629 return nil 630 } 631 632 func expandAbsolutePath(scanRoot string, paths []string) []string { 633 var locations []string 634 for _, l := range paths { 635 locations = append(locations, filepath.Join(scanRoot, l)) 636 } 637 return locations 638 } 639 640 func expandAllAbsolutePaths(scanRoots []*scalibrfs.ScanRoot) ([]*scalibrfs.ScanRoot, error) { 641 var result []*scalibrfs.ScanRoot 642 for _, r := range scanRoots { 643 abs, err := r.WithAbsolutePath() 644 if err != nil { 645 return nil, err 646 } 647 result = append(result, abs) 648 } 649 650 return result, nil 651 } 652 653 func stripAllPathPrefixes(paths []string, scanRoots []*scalibrfs.ScanRoot) ([]string, error) { 654 if len(scanRoots) > 0 && scanRoots[0].IsVirtual() { 655 // We're using a virtual filesystem with no real absolute paths. 656 return paths, nil 657 } 658 result := make([]string, 0, len(paths)) 659 for _, p := range paths { 660 abs, err := filepath.Abs(p) 661 if err != nil { 662 return nil, err 663 } 664 665 rp, err := stripFromAtLeastOnePrefix(abs, scanRoots) 666 if err != nil { 667 return nil, err 668 } 669 result = append(result, rp) 670 } 671 672 return result, nil 673 } 674 675 // toSlashPaths returns a new []string that converts all paths to use / 676 func toSlashPaths(paths []string) []string { 677 returnPaths := make([]string, len(paths)) 678 for i, s := range paths { 679 returnPaths[i] = filepath.ToSlash(s) 680 } 681 682 return returnPaths 683 } 684 685 // stripFromAtLeastOnePrefix returns the path relative to the first prefix it is relative to. 686 // If the path is not relative to any of the prefixes, an error is returned. 687 // The path is expected to be absolute. 688 func stripFromAtLeastOnePrefix(path string, scanRoots []*scalibrfs.ScanRoot) (string, error) { 689 for _, r := range scanRoots { 690 if !strings.HasPrefix(path, r.Path) { 691 continue 692 } 693 rel, err := filepath.Rel(r.Path, path) 694 if err != nil { 695 return "", err 696 } 697 698 return rel, nil 699 } 700 701 return "", ErrNotRelativeToScanRoots 702 } 703 704 func pathStringListToMap(paths []string) map[string]bool { 705 result := make(map[string]bool) 706 for _, p := range paths { 707 result[p] = true 708 } 709 return result 710 } 711 712 func addErrToMap(errors map[string]map[string]error, extractor string, path string, err error) { 713 if _, ok := errors[extractor]; !ok { 714 errors[extractor] = make(map[string]error) 715 } 716 errors[extractor][path] = err 717 } 718 719 func errToExtractorStatus(extractors []Extractor, foundInv map[string]bool, errs map[string]map[string]error) []*plugin.Status { 720 result := make([]*plugin.Status, 0, len(extractors)) 721 for _, ex := range extractors { 722 fileErrs := createFileErrorsForPlugin(errs[ex.Name()]) 723 result = append(result, plugin.StatusFromErr(ex, foundInv[ex.Name()], plugin.OverallErrFromFileErrs(fileErrs), fileErrs)) 724 } 725 return result 726 } 727 728 func createFileErrorsForPlugin(errorMap map[string]error) []*plugin.FileError { 729 if len(errorMap) == 0 { 730 return nil 731 } 732 733 var fileErrors []*plugin.FileError 734 for path, err := range errorMap { 735 fileErrors = append(fileErrors, &plugin.FileError{ 736 FilePath: path, 737 ErrorMessage: err.Error(), 738 }) 739 } 740 return fileErrors 741 } 742 743 func (wc *walkContext) printStatus() { 744 log.Infof("Status: new inodes: %d, %.1f inodes/s, new extract calls: %d, path: %q\n", 745 wc.inodesVisited-wc.lastInodes, 746 float64(wc.inodesVisited-wc.lastInodes)/time.Since(wc.lastStatus).Seconds(), 747 wc.extractCalls-wc.lastExtracts, wc.currentPath) 748 749 wc.lastStatus = time.Now() 750 wc.lastInodes = wc.inodesVisited 751 wc.lastExtracts = wc.extractCalls 752 } 753 754 // GetRealPath returns the real absolute path of the file on the scanning host's filesystem. 755 // If the file is on a virtual filesystem (e.g. a remote container), it is first copied into a 756 // temporary directory on the scanning host's filesystem. It's up to the caller to delete the 757 // directory once they're done using it. 758 func (i *ScanInput) GetRealPath() (string, error) { 759 return scalibrfs.GetRealPath(&scalibrfs.ScanRoot{FS: i.FS, Path: i.Root}, i.Path, i.Reader) 760 } 761 762 // TODO(b/380419487): This list is not exhaustive. We should add more extensions here. 763 var ( 764 unlikelyExecutableExtensions = map[string]bool{ 765 ".c": true, 766 ".cc": true, 767 ".cargo-ok": true, 768 ".crate": true, 769 ".css": true, 770 ".db": true, 771 ".gitattributes": true, 772 ".gitignore": true, 773 ".go": true, 774 ".h": true, 775 ".html": true, 776 ".jpg": true, 777 ".json": true, 778 ".lock": true, 779 ".log": true, 780 ".md": true, 781 ".mod": true, 782 ".png": true, 783 ".proto": true, 784 ".rs": true, 785 ".stderr": true, 786 ".sum": true, 787 ".svg": true, 788 ".tar": true, 789 ".tmpl": true, 790 ".toml": true, 791 ".txt": true, 792 ".woff2": true, 793 ".xml": true, 794 ".yaml": true, 795 ".yml": true, 796 ".zip": true, 797 ".ziphash": true, 798 } 799 800 // Always interesting binary extensions 801 likelyFileExts = map[string]bool{ 802 ".a": true, 803 // Binary extensions 804 ".bin": true, 805 ".elf": true, 806 ".run": true, 807 ".o": true, 808 // Windows Binary extensions: 809 ".exe": true, 810 ".dll": true, 811 812 // Shared library: true extension: true 813 ".so": true, 814 // and .so: true.[number] 815 816 // Script extensions: true 817 ".py": true, // Python 818 ".sh": true, // bash/sh/zsh 819 ".bash": true, 820 821 ".pl": true, // Perl 822 ".rb": true, // Ruby 823 ".php": true, // Php 824 ".awk": true, // Awk 825 ".tcl": true, // tcl 826 } 827 likelyFileExtRegexes = map[string]*regexp.Regexp{ 828 ".so.": regexp.MustCompile(`.so.\d+$`), 829 } 830 ) 831 832 // IsInterestingExecutable returns true if the specified file is an executable which may need scanning. 833 func IsInterestingExecutable(api FileAPI) bool { 834 path := api.Path() 835 extension := filepath.Ext(path) 836 if unlikelyExecutableExtensions[extension] { 837 return false 838 } 839 840 if likelyFileExts[extension] { 841 return true 842 } 843 844 for substrTest, regex := range likelyFileExtRegexes { 845 if strings.Contains(path, substrTest) && regex.MatchString(path) { 846 return true 847 } 848 } 849 850 mode, err := api.Stat() 851 return err == nil && mode.Mode()&0111 != 0 852 } 853 854 func fileSize(file FileAPI) (int64, error) { 855 info, err := file.Stat() 856 if err != nil { 857 return 0, err 858 } 859 return info.Size(), nil 860 }