github.com/google/osv-scalibr@v0.4.1/scalibr.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package scalibr provides an interface for running software inventory 16 // extraction and security finding detection on a machine. 17 package scalibr 18 19 import ( 20 "cmp" 21 "context" 22 "errors" 23 "fmt" 24 "os" 25 "regexp" 26 "runtime" 27 "slices" 28 "time" 29 30 "github.com/gobwas/glob" 31 "github.com/google/osv-scalibr/annotator" 32 "github.com/google/osv-scalibr/artifact/image" 33 "github.com/google/osv-scalibr/artifact/image/layerscanning/trace" 34 "github.com/google/osv-scalibr/detector" 35 "github.com/google/osv-scalibr/detector/detectorrunner" 36 "github.com/google/osv-scalibr/enricher" 37 ce "github.com/google/osv-scalibr/enricher/secrets/convert" 38 "github.com/google/osv-scalibr/extractor" 39 "github.com/google/osv-scalibr/extractor/filesystem" 40 cf "github.com/google/osv-scalibr/extractor/filesystem/secrets/convert" 41 "github.com/google/osv-scalibr/extractor/standalone" 42 scalibrfs "github.com/google/osv-scalibr/fs" 43 "github.com/google/osv-scalibr/inventory" 44 "github.com/google/osv-scalibr/log" 45 "github.com/google/osv-scalibr/packageindex" 46 "github.com/google/osv-scalibr/plugin" 47 pl "github.com/google/osv-scalibr/plugin/list" 48 "github.com/google/osv-scalibr/result" 49 "github.com/google/osv-scalibr/stats" 50 "github.com/google/osv-scalibr/version" 51 "go.uber.org/multierr" 52 53 cpb "github.com/google/osv-scalibr/binary/proto/config_go_proto" 54 ) 55 56 var ( 57 errNoScanRoot = errors.New("no scan root specified") 58 errFilesWithSeveralRoots = errors.New("can't extract specific files with several scan roots") 59 ) 60 61 // Scanner is the main entry point of the scanner. 62 type Scanner struct{} 63 64 // New creates a new scanner instance. 65 func New() *Scanner { return &Scanner{} } 66 67 // ScanConfig stores the config settings of a scan run such as the plugins to 68 // use and the dir to consider the root of the scanned system. 69 type ScanConfig struct { 70 Plugins []plugin.Plugin 71 // Capabilities that the scanning environment satisfies, e.g. whether there's 72 // network access. Some plugins can only run if certain requirements are met. 73 Capabilities *plugin.Capabilities 74 // ScanRoots contain the list of root dir used by file walking during extraction. 75 // All extractors and detectors will assume files are relative to these dirs. 76 // Example use case: Scanning a container image or source code repo that is 77 // mounted to a local dir. 78 ScanRoots []*scalibrfs.ScanRoot 79 // Optional: Individual file or dir paths to extract inventory from. If specified, 80 // the extractors will only look at the specified files or at the contents of the 81 // specified directories during the filesystem traversal. 82 // Note that on real filesystems these are not relative to the ScanRoots and 83 // thus need to be in sub-directories of one of the ScanRoots. 84 PathsToExtract []string 85 // Optional: If true, only the files in the top-level directories in PathsToExtract are 86 // extracted and sub-directories are ignored. 87 IgnoreSubDirs bool 88 // Optional: Directories that the file system walk should ignore. 89 // Note that on real filesystems these are not relative to the ScanRoots and 90 // thus need to be in sub-directories of one of the ScanRoots. 91 // TODO(b/279413691): Also skip local paths, e.g. "Skip all .git dirs" 92 DirsToSkip []string 93 // Optional: If the regex matches a directory, it will be skipped. 94 SkipDirRegex *regexp.Regexp 95 // Optional: If the glob matches a directory, it will be skipped. 96 SkipDirGlob glob.Glob 97 // Optional: Files larger than this size in bytes are skipped. If 0, no limit is applied. 98 MaxFileSize int 99 // Optional: Skip files declared in .gitignore files in source repos. 100 UseGitignore bool 101 // Optional: stats allows to enter a metric hook. If left nil, no metrics will be recorded. 102 Stats stats.Collector 103 // Optional: Whether to read symlinks. 104 ReadSymlinks bool 105 // Optional: Limit for visited inodes. If 0, no limit is applied. 106 MaxInodes int 107 // Optional: By default, inventories stores a path relative to the scan root. If StoreAbsolutePath 108 // is set, the absolute path is stored instead. 109 StoreAbsolutePath bool 110 // Optional: If true, print a detailed analysis of the duration of each extractor. 111 PrintDurationAnalysis bool 112 // Optional: If true, fail the scan if any permission errors are encountered. 113 ErrorOnFSErrors bool 114 // Optional: If set, this function is called for each file to check if there is a specific 115 // extractor for this file. If it returns an extractor, only that extractor is used for the file. 116 ExtractorOverride func(filesystem.FileAPI) []filesystem.Extractor 117 // Optional: If set, SCALIBR returns an error when a plugin's required plugin 118 // isn't configured instead of enabling required plugins automatically. 119 ExplicitPlugins bool 120 // Optional: Configuration to apply to auto-enabled required plugins. 121 RequiredPluginConfig *cpb.PluginConfig 122 } 123 124 // EnableRequiredPlugins adds those plugins to the config that are required by enabled 125 // plugins (such as Detectors or Enrichers) but have not been explicitly enabled. 126 func (cfg *ScanConfig) EnableRequiredPlugins() error { 127 enabledPlugins := map[string]struct{}{} 128 for _, e := range cfg.Plugins { 129 enabledPlugins[e.Name()] = struct{}{} 130 } 131 132 requiredPlugins := map[string]struct{}{} 133 for _, p := range cfg.Plugins { 134 if d, ok := p.(detector.Detector); ok { 135 for _, req := range d.RequiredExtractors() { 136 requiredPlugins[req] = struct{}{} 137 } 138 } 139 if e, ok := p.(enricher.Enricher); ok { 140 for _, req := range e.RequiredPlugins() { 141 requiredPlugins[req] = struct{}{} 142 } 143 } 144 } 145 146 for p := range requiredPlugins { 147 if _, enabled := enabledPlugins[p]; enabled { 148 continue 149 } 150 if cfg.ExplicitPlugins { 151 // Plugins need to be explicitly enabled, 152 // so we log an error instead of auto-enabling them. 153 return fmt.Errorf("required plugin %q not enabled", p) 154 } 155 156 requiredPlugin, err := pl.FromName(p, cfg.RequiredPluginConfig) 157 // TODO: b/416106602 - Implement transitive enablement for required enrichers. 158 if err != nil { 159 return fmt.Errorf("required plugin %q not present in any list.go: %w", p, err) 160 } 161 enabledPlugins[p] = struct{}{} 162 cfg.Plugins = append(cfg.Plugins, requiredPlugin) 163 } 164 return nil 165 } 166 167 // ValidatePluginRequirements checks that the scanning environment's capabilities satisfy 168 // the requirements of all enabled plugin. 169 func (cfg *ScanConfig) ValidatePluginRequirements() error { 170 errs := []error{} 171 for _, p := range cfg.Plugins { 172 if err := plugin.ValidateRequirements(p, cfg.Capabilities); err != nil { 173 errs = append(errs, err) 174 } 175 } 176 return errors.Join(errs...) 177 } 178 179 // LINT.IfChange 180 181 // ScanResult stores the results of a scan incl. scan status and inventory found. 182 // TODO: b/425645186 - Remove this alias once all callers are migrated to the result package. 183 type ScanResult = result.ScanResult 184 185 // LINT.ThenChange(/binary/proto/scan_result.proto) 186 187 // Scan executes the extraction/detection/annotation/etc. plugins using the provided scan config. 188 func (Scanner) Scan(ctx context.Context, config *ScanConfig) (sr *ScanResult) { 189 if config.Stats == nil { 190 config.Stats = stats.NoopCollector{} 191 } 192 defer func() { 193 config.Stats.AfterScan(time.Since(sr.StartTime), sr.Status) 194 }() 195 sro := &newScanResultOptions{ 196 StartTime: time.Now(), 197 } 198 if err := config.EnableRequiredPlugins(); err != nil { 199 sro.Err = err 200 } else if err := config.ValidatePluginRequirements(); err != nil { 201 sro.Err = err 202 } else if len(config.ScanRoots) == 0 { 203 sro.Err = errNoScanRoot 204 } else if len(config.PathsToExtract) > 0 && len(config.ScanRoots) > 1 { 205 sro.Err = errFilesWithSeveralRoots 206 } 207 if sro.Err != nil { 208 sro.EndTime = time.Now() 209 return newScanResult(sro) 210 } 211 extractors := pl.FilesystemExtractors(config.Plugins) 212 extractors, err := cf.SetupVelesExtractors(extractors) 213 if err != nil { 214 sro.Err = multierr.Append(sro.Err, err) 215 sro.EndTime = time.Now() 216 return newScanResult(sro) 217 } 218 extractorConfig := &filesystem.Config{ 219 Stats: config.Stats, 220 ReadSymlinks: config.ReadSymlinks, 221 Extractors: extractors, 222 PathsToExtract: config.PathsToExtract, 223 IgnoreSubDirs: config.IgnoreSubDirs, 224 DirsToSkip: config.DirsToSkip, 225 SkipDirRegex: config.SkipDirRegex, 226 MaxFileSize: config.MaxFileSize, 227 SkipDirGlob: config.SkipDirGlob, 228 UseGitignore: config.UseGitignore, 229 ScanRoots: config.ScanRoots, 230 MaxInodes: config.MaxInodes, 231 StoreAbsolutePath: config.StoreAbsolutePath, 232 PrintDurationAnalysis: config.PrintDurationAnalysis, 233 ErrorOnFSErrors: config.ErrorOnFSErrors, 234 ExtractorOverride: config.ExtractorOverride, 235 } 236 inv, extractorStatus, err := filesystem.Run(ctx, extractorConfig) 237 if err != nil { 238 sro.Err = err 239 sro.EndTime = time.Now() 240 return newScanResult(sro) 241 } 242 243 sro.Inventory = inv 244 // Defer cleanup of all temporary files and directories created during extraction. 245 // This function iterates over all EmbeddedFS entries in the inventory and 246 // removes their associated TempPaths. 247 // Any failures during removal are logged but do not interrupt execution. 248 defer func() { 249 for _, embeddedFS := range sro.Inventory.EmbeddedFSs { 250 for _, tmpPath := range embeddedFS.TempPaths { 251 if err := os.RemoveAll(tmpPath); err != nil { 252 log.Infof("Failed to remove %s", tmpPath) 253 } 254 } 255 } 256 }() 257 sro.PluginStatus = append(sro.PluginStatus, extractorStatus...) 258 sysroot := config.ScanRoots[0] 259 standaloneCfg := &standalone.Config{ 260 Extractors: pl.StandaloneExtractors(config.Plugins), 261 ScanRoot: &scalibrfs.ScanRoot{FS: sysroot.FS, Path: sysroot.Path}, 262 } 263 standaloneInv, standaloneStatus, err := standalone.Run(ctx, standaloneCfg) 264 if err != nil { 265 sro.Err = err 266 sro.EndTime = time.Now() 267 return newScanResult(sro) 268 } 269 270 sro.Inventory.Append(standaloneInv) 271 sro.PluginStatus = append(sro.PluginStatus, standaloneStatus...) 272 273 px, err := packageindex.New(sro.Inventory.Packages) 274 if err != nil { 275 sro.Err = err 276 sro.EndTime = time.Now() 277 return newScanResult(sro) 278 } 279 280 findings, detectorStatus, err := detectorrunner.Run( 281 ctx, config.Stats, pl.Detectors(config.Plugins), &scalibrfs.ScanRoot{FS: sysroot.FS, Path: sysroot.Path}, px, 282 ) 283 sro.Inventory.PackageVulns = findings.PackageVulns 284 sro.Inventory.GenericFindings = findings.GenericFindings 285 sro.PluginStatus = append(sro.PluginStatus, detectorStatus...) 286 if err != nil { 287 sro.Err = err 288 } 289 290 annotatorCfg := &annotator.Config{ 291 Annotators: pl.Annotators(config.Plugins), 292 ScanRoot: sysroot, 293 } 294 annotatorStatus, err := annotator.Run(ctx, annotatorCfg, &sro.Inventory) 295 sro.PluginStatus = append(sro.PluginStatus, annotatorStatus...) 296 if err != nil { 297 sro.Err = multierr.Append(sro.Err, err) 298 } 299 300 enrichers := pl.Enrichers(config.Plugins) 301 enrichers, err = ce.SetupVelesEnrichers(enrichers) 302 if err != nil { 303 sro.Err = multierr.Append(sro.Err, err) 304 sro.EndTime = time.Now() 305 return newScanResult(sro) 306 } 307 enricherCfg := &enricher.Config{ 308 Enrichers: enrichers, 309 ScanRoot: &scalibrfs.ScanRoot{ 310 FS: sysroot.FS, 311 Path: sysroot.Path, 312 }, 313 } 314 enricherStatus, err := enricher.Run(ctx, enricherCfg, &sro.Inventory) 315 sro.PluginStatus = append(sro.PluginStatus, enricherStatus...) 316 if err != nil { 317 sro.Err = multierr.Append(sro.Err, err) 318 } 319 320 sro.EndTime = time.Now() 321 return newScanResult(sro) 322 } 323 324 // ScanContainer scans the provided container image for packages and security findings using the 325 // provided scan config. It populates the LayerDetails field of the packages with the origin layer 326 // details. Functions to create an Image from a tarball, remote name, or v1.Image are available in 327 // the artifact/image/layerscanning/image package. 328 func (s Scanner) ScanContainer(ctx context.Context, img image.Image, config *ScanConfig) (sr *ScanResult, err error) { 329 if len(config.ScanRoots) > 0 { 330 log.Warnf("expected no scan roots, but got %d scan roots, overwriting with container image scan root", len(config.ScanRoots)) 331 } 332 333 imagefs := img.FS() 334 // Overwrite the scan roots with the chain layer filesystem. 335 config.ScanRoots = []*scalibrfs.ScanRoot{ 336 { 337 FS: imagefs, 338 }, 339 } 340 341 storeAbsPath := config.StoreAbsolutePath 342 // Don't try and store absolute path because on windows it will turn unix paths into 343 // Windows paths. 344 config.StoreAbsolutePath = false 345 346 // Suppress running enrichers until after layer details are populated. 347 var enrichers []enricher.Enricher 348 var nonEnricherPlugins []plugin.Plugin 349 350 for _, p := range config.Plugins { 351 if e, ok := p.(enricher.Enricher); ok { 352 enrichers = append(enrichers, e) 353 } else { 354 nonEnricherPlugins = append(nonEnricherPlugins, p) 355 } 356 } 357 config.Plugins = nonEnricherPlugins 358 359 chainLayers, err := img.ChainLayers() 360 if err != nil { 361 return nil, fmt.Errorf("failed to get chain layers: %w", err) 362 } 363 364 scanResult := s.Scan(ctx, config) 365 extractors := pl.FilesystemExtractors(config.Plugins) 366 extractors, err = cf.SetupVelesExtractors(extractors) 367 if err != nil { 368 return scanResult, err 369 } 370 extractorConfig := &filesystem.Config{ 371 Stats: config.Stats, 372 ReadSymlinks: config.ReadSymlinks, 373 Extractors: extractors, 374 PathsToExtract: config.PathsToExtract, 375 IgnoreSubDirs: config.IgnoreSubDirs, 376 DirsToSkip: config.DirsToSkip, 377 SkipDirRegex: config.SkipDirRegex, 378 MaxFileSize: config.MaxFileSize, 379 SkipDirGlob: config.SkipDirGlob, 380 UseGitignore: config.UseGitignore, 381 ScanRoots: config.ScanRoots, 382 MaxInodes: config.MaxInodes, 383 StoreAbsolutePath: config.StoreAbsolutePath, 384 PrintDurationAnalysis: config.PrintDurationAnalysis, 385 ErrorOnFSErrors: config.ErrorOnFSErrors, 386 ExtractorOverride: config.ExtractorOverride, 387 } 388 389 // Populate the LayerDetails field of the inventory by tracing the layer origins. 390 trace.PopulateLayerDetails(ctx, &scanResult.Inventory, chainLayers, pl.FilesystemExtractors(config.Plugins), extractorConfig) 391 392 // Since we skipped storing absolute path in the main Scan function. 393 // Actually convert it to absolute path here. 394 if storeAbsPath { 395 for _, pkg := range scanResult.Inventory.Packages { 396 for i := range pkg.Locations { 397 pkg.Locations[i] = "/" + pkg.Locations[i] 398 } 399 } 400 } 401 402 // Run enrichers with the updated inventory. 403 enrichers, err = ce.SetupVelesEnrichers(enrichers) 404 if err != nil { 405 scanResult.Status.Status = plugin.ScanStatusFailed 406 scanResult.Status.FailureReason = err.Error() 407 return scanResult, nil //nolint:nilerr // Errors are returned in the scanResult. 408 } 409 enricherCfg := &enricher.Config{ 410 Enrichers: enrichers, 411 ScanRoot: &scalibrfs.ScanRoot{ 412 FS: imagefs, 413 }, 414 } 415 enricherStatus, err := enricher.Run(ctx, enricherCfg, &scanResult.Inventory) 416 scanResult.PluginStatus = append(scanResult.PluginStatus, enricherStatus...) 417 if err != nil { 418 scanResult.Status.Status = plugin.ScanStatusFailed 419 scanResult.Status.FailureReason = err.Error() 420 } 421 422 // Keep the img variable alive till the end incase cleanup is not called on the parent. 423 runtime.KeepAlive(img) 424 425 return scanResult, nil 426 } 427 428 type newScanResultOptions struct { 429 StartTime time.Time 430 EndTime time.Time 431 PluginStatus []*plugin.Status 432 Inventory inventory.Inventory 433 Err error 434 } 435 436 func newScanResult(o *newScanResultOptions) *ScanResult { 437 status := &plugin.ScanStatus{} 438 if o.Err != nil { 439 status.Status = plugin.ScanStatusFailed 440 status.FailureReason = o.Err.Error() 441 } else { 442 status.Status = plugin.ScanStatusSucceeded 443 // If any plugin failed, set the overall scan status to partially succeeded. 444 for _, pluginStatus := range o.PluginStatus { 445 if pluginStatus.Status.Status == plugin.ScanStatusFailed { 446 status.Status = plugin.ScanStatusPartiallySucceeded 447 status.FailureReason = "not all plugins succeeded, see the plugin statuses" 448 break 449 } 450 } 451 } 452 r := &ScanResult{ 453 StartTime: o.StartTime, 454 EndTime: o.EndTime, 455 Version: version.ScannerVersion, 456 Status: status, 457 PluginStatus: o.PluginStatus, 458 Inventory: o.Inventory, 459 } 460 461 // Sort results for better diffing. 462 sortResults(r) 463 return r 464 } 465 466 // sortResults sorts the result to make the output deterministic and diffable. 467 func sortResults(results *ScanResult) { 468 slices.SortFunc(results.PluginStatus, cmpStatus) 469 slices.SortFunc(results.Inventory.Packages, CmpPackages) 470 slices.SortFunc(results.Inventory.PackageVulns, cmpPackageVulns) 471 slices.SortFunc(results.Inventory.GenericFindings, cmpGenericFindings) 472 } 473 474 // CmpPackages is a comparison helper fun to be used for sorting Package structs. 475 func CmpPackages(a, b *extractor.Package) int { 476 res := cmp.Or( 477 cmp.Compare(a.Name, b.Name), 478 cmp.Compare(a.Version, b.Version), 479 cmp.Compare(len(a.Plugins), len(b.Plugins)), 480 ) 481 if res != 0 { 482 return res 483 } 484 485 res = 0 486 for i := range a.Plugins { 487 res = cmp.Or(res, cmp.Compare(a.Plugins[i], b.Plugins[i])) 488 } 489 if res != 0 { 490 return res 491 } 492 493 aloc := fmt.Sprintf("%v", a.Locations) 494 bloc := fmt.Sprintf("%v", b.Locations) 495 return cmp.Compare(aloc, bloc) 496 } 497 498 func cmpStatus(a, b *plugin.Status) int { 499 return cmpString(a.Name, b.Name) 500 } 501 502 func cmpPackageVulns(a, b *inventory.PackageVuln) int { 503 return cmpString(a.Vulnerability.Id, b.Vulnerability.Id) 504 } 505 506 func cmpGenericFindings(a, b *inventory.GenericFinding) int { 507 if a.Adv.ID.Reference != b.Adv.ID.Reference { 508 return cmpString(a.Adv.ID.Reference, b.Adv.ID.Reference) 509 } 510 return cmpString(a.Target.Extra, b.Target.Extra) 511 } 512 513 func cmpString(a, b string) int { 514 if a < b { 515 return -1 516 } else if a > b { 517 return 1 518 } 519 return 0 520 }