github.com/devseccon/trivy@v0.47.1-0.20231123133102-bd902a0bd996/pkg/fanal/analyzer/analyzer.go (about) 1 package analyzer 2 3 import ( 4 "context" 5 "errors" 6 "io/fs" 7 "os" 8 "regexp" 9 "sort" 10 "strings" 11 "sync" 12 13 "github.com/samber/lo" 14 "golang.org/x/exp/slices" 15 "golang.org/x/sync/semaphore" 16 "golang.org/x/xerrors" 17 18 dio "github.com/aquasecurity/go-dep-parser/pkg/io" 19 fos "github.com/devseccon/trivy/pkg/fanal/analyzer/os" 20 "github.com/devseccon/trivy/pkg/fanal/log" 21 "github.com/devseccon/trivy/pkg/fanal/types" 22 "github.com/devseccon/trivy/pkg/misconf" 23 ) 24 25 var ( 26 analyzers = make(map[Type]analyzer) 27 postAnalyzers = make(map[Type]postAnalyzerInitialize) 28 29 // ErrUnknownOS occurs when unknown OS is analyzed. 30 ErrUnknownOS = xerrors.New("unknown OS") 31 // ErrPkgAnalysis occurs when the analysis of packages is failed. 32 ErrPkgAnalysis = xerrors.New("failed to analyze packages") 33 // ErrNoPkgsDetected occurs when the required files for an OS package manager are not detected 34 ErrNoPkgsDetected = xerrors.New("no packages detected") 35 ) 36 37 ////////////////////// 38 // Analyzer options // 39 ////////////////////// 40 41 // AnalyzerOptions is used to initialize analyzers 42 type AnalyzerOptions struct { 43 Group Group 44 Parallel int 45 FilePatterns []string 46 DisabledAnalyzers []Type 47 MisconfScannerOption misconf.ScannerOption 48 SecretScannerOption SecretScannerOption 49 LicenseScannerOption LicenseScannerOption 50 } 51 52 type SecretScannerOption struct { 53 ConfigPath string 54 } 55 56 type LicenseScannerOption struct { 57 // Use license classifier to get better results though the classification is expensive. 58 Full bool 59 ClassifierConfidenceLevel float64 60 } 61 62 //////////////// 63 // Interfaces // 64 //////////////// 65 66 // Initializer represents analyzers that need to take parameters from users 67 type Initializer interface { 68 Init(AnalyzerOptions) error 69 } 70 71 type analyzer interface { 72 Type() Type 73 Version() int 74 Analyze(ctx context.Context, input AnalysisInput) (*AnalysisResult, error) 75 Required(filePath string, info os.FileInfo) bool 76 } 77 78 type PostAnalyzer interface { 79 Type() Type 80 Version() int 81 PostAnalyze(ctx context.Context, input PostAnalysisInput) (*AnalysisResult, error) 82 Required(filePath string, info os.FileInfo) bool 83 } 84 85 //////////////////// 86 // Analyzer group // 87 //////////////////// 88 89 type Group string 90 91 const GroupBuiltin Group = "builtin" 92 93 func RegisterAnalyzer(analyzer analyzer) { 94 if _, ok := analyzers[analyzer.Type()]; ok { 95 log.Logger.Fatalf("analyzer %s is registered twice", analyzer.Type()) 96 } 97 analyzers[analyzer.Type()] = analyzer 98 } 99 100 type postAnalyzerInitialize func(options AnalyzerOptions) (PostAnalyzer, error) 101 102 func RegisterPostAnalyzer(t Type, initializer postAnalyzerInitialize) { 103 if _, ok := postAnalyzers[t]; ok { 104 log.Logger.Fatalf("analyzer %s is registered twice", t) 105 } 106 postAnalyzers[t] = initializer 107 } 108 109 // DeregisterAnalyzer is mainly for testing 110 func DeregisterAnalyzer(t Type) { 111 delete(analyzers, t) 112 } 113 114 // CustomGroup returns a group name for custom analyzers 115 // This is mainly intended to be used in DevSecCon products. 116 type CustomGroup interface { 117 Group() Group 118 } 119 120 type Opener func() (dio.ReadSeekCloserAt, error) 121 122 type AnalyzerGroup struct { 123 analyzers []analyzer 124 postAnalyzers []PostAnalyzer 125 filePatterns map[Type][]*regexp.Regexp 126 } 127 128 /////////////////////////// 129 // Analyzer input/output // 130 /////////////////////////// 131 132 type AnalysisInput struct { 133 Dir string 134 FilePath string 135 Info os.FileInfo 136 Content dio.ReadSeekerAt 137 138 Options AnalysisOptions 139 } 140 141 type PostAnalysisInput struct { 142 FS fs.FS 143 Options AnalysisOptions 144 } 145 146 type AnalysisOptions struct { 147 Offline bool 148 FileChecksum bool 149 } 150 151 type AnalysisResult struct { 152 m sync.Mutex 153 OS types.OS 154 Repository *types.Repository 155 PackageInfos []types.PackageInfo 156 Applications []types.Application 157 Misconfigurations []types.Misconfiguration 158 Secrets []types.Secret 159 Licenses []types.LicenseFile 160 SystemInstalledFiles []string // A list of files installed by OS package manager 161 162 // Digests contains SHA-256 digests of unpackaged files 163 // used to search for SBOM attestation. 164 Digests map[string]string 165 166 // For Red Hat 167 BuildInfo *types.BuildInfo 168 169 // CustomResources hold analysis results from custom analyzers. 170 // It is for extensibility and not used in OSS. 171 CustomResources []types.CustomResource 172 } 173 174 func NewAnalysisResult() *AnalysisResult { 175 result := new(AnalysisResult) 176 return result 177 } 178 179 func (r *AnalysisResult) isEmpty() bool { 180 return lo.IsEmpty(r.OS) && r.Repository == nil && len(r.PackageInfos) == 0 && len(r.Applications) == 0 && 181 len(r.Misconfigurations) == 0 && len(r.Secrets) == 0 && len(r.Licenses) == 0 && len(r.SystemInstalledFiles) == 0 && 182 r.BuildInfo == nil && len(r.Digests) == 0 && len(r.CustomResources) == 0 183 } 184 185 func (r *AnalysisResult) Sort() { 186 // OS packages 187 sort.Slice(r.PackageInfos, func(i, j int) bool { 188 return r.PackageInfos[i].FilePath < r.PackageInfos[j].FilePath 189 }) 190 191 for _, pi := range r.PackageInfos { 192 sort.Sort(pi.Packages) 193 } 194 195 // Language-specific packages 196 sort.Slice(r.Applications, func(i, j int) bool { 197 if r.Applications[i].FilePath != r.Applications[j].FilePath { 198 return r.Applications[i].FilePath < r.Applications[j].FilePath 199 } 200 return r.Applications[i].Type < r.Applications[j].Type 201 }) 202 203 for _, app := range r.Applications { 204 sort.Sort(app.Libraries) 205 } 206 207 // Custom resources 208 sort.Slice(r.CustomResources, func(i, j int) bool { 209 return r.CustomResources[i].FilePath < r.CustomResources[j].FilePath 210 }) 211 212 // Misconfigurations 213 sort.Slice(r.Misconfigurations, func(i, j int) bool { 214 return r.Misconfigurations[i].FilePath < r.Misconfigurations[j].FilePath 215 }) 216 217 // Secrets 218 sort.Slice(r.Secrets, func(i, j int) bool { 219 return r.Secrets[i].FilePath < r.Secrets[j].FilePath 220 }) 221 for _, sec := range r.Secrets { 222 sort.Slice(sec.Findings, func(i, j int) bool { 223 if sec.Findings[i].RuleID != sec.Findings[j].RuleID { 224 return sec.Findings[i].RuleID < sec.Findings[j].RuleID 225 } 226 return sec.Findings[i].StartLine < sec.Findings[j].StartLine 227 }) 228 } 229 230 // License files 231 sort.Slice(r.Licenses, func(i, j int) bool { 232 if r.Licenses[i].Type == r.Licenses[j].Type { 233 if r.Licenses[i].FilePath == r.Licenses[j].FilePath { 234 return r.Licenses[i].Layer.DiffID < r.Licenses[j].Layer.DiffID 235 } else { 236 return r.Licenses[i].FilePath < r.Licenses[j].FilePath 237 } 238 } 239 240 return r.Licenses[i].Type < r.Licenses[j].Type 241 }) 242 } 243 244 func (r *AnalysisResult) Merge(newResult *AnalysisResult) { 245 if newResult == nil || newResult.isEmpty() { 246 return 247 } 248 249 // this struct is accessed by multiple goroutines 250 r.m.Lock() 251 defer r.m.Unlock() 252 253 r.OS.Merge(newResult.OS) 254 255 if newResult.Repository != nil { 256 r.Repository = newResult.Repository 257 } 258 259 if len(newResult.PackageInfos) > 0 { 260 r.PackageInfos = append(r.PackageInfos, newResult.PackageInfos...) 261 } 262 263 if len(newResult.Applications) > 0 { 264 r.Applications = append(r.Applications, newResult.Applications...) 265 } 266 267 // Merge SHA-256 digests of unpackaged files 268 if newResult.Digests != nil { 269 r.Digests = lo.Assign(r.Digests, newResult.Digests) 270 } 271 272 r.Misconfigurations = append(r.Misconfigurations, newResult.Misconfigurations...) 273 r.Secrets = append(r.Secrets, newResult.Secrets...) 274 r.Licenses = append(r.Licenses, newResult.Licenses...) 275 r.SystemInstalledFiles = append(r.SystemInstalledFiles, newResult.SystemInstalledFiles...) 276 277 if newResult.BuildInfo != nil { 278 if r.BuildInfo == nil { 279 r.BuildInfo = newResult.BuildInfo 280 } else { 281 // We don't need to merge build info here 282 // because there is theoretically only one file about build info in each layer. 283 if newResult.BuildInfo.Nvr != "" || newResult.BuildInfo.Arch != "" { 284 r.BuildInfo.Nvr = newResult.BuildInfo.Nvr 285 r.BuildInfo.Arch = newResult.BuildInfo.Arch 286 } 287 if len(newResult.BuildInfo.ContentSets) > 0 { 288 r.BuildInfo.ContentSets = newResult.BuildInfo.ContentSets 289 } 290 } 291 } 292 293 r.CustomResources = append(r.CustomResources, newResult.CustomResources...) 294 } 295 296 func belongToGroup(groupName Group, analyzerType Type, disabledAnalyzers []Type, analyzer any) bool { 297 if slices.Contains(disabledAnalyzers, analyzerType) { 298 return false 299 } 300 301 analyzerGroupName := GroupBuiltin 302 if cg, ok := analyzer.(CustomGroup); ok { 303 analyzerGroupName = cg.Group() 304 } 305 if analyzerGroupName != groupName { 306 return false 307 } 308 309 return true 310 } 311 312 const separator = ":" 313 314 func NewAnalyzerGroup(opt AnalyzerOptions) (AnalyzerGroup, error) { 315 groupName := opt.Group 316 if groupName == "" { 317 groupName = GroupBuiltin 318 } 319 320 group := AnalyzerGroup{ 321 filePatterns: make(map[Type][]*regexp.Regexp), 322 } 323 for _, p := range opt.FilePatterns { 324 // e.g. "dockerfile:my_dockerfile_*" 325 s := strings.SplitN(p, separator, 2) 326 if len(s) != 2 { 327 return group, xerrors.Errorf("invalid file pattern (%s) expected format: \"fileType:regexPattern\" e.g. \"dockerfile:my_dockerfile_*\"", p) 328 } 329 330 fileType, pattern := s[0], s[1] 331 r, err := regexp.Compile(pattern) 332 if err != nil { 333 return group, xerrors.Errorf("invalid file regexp (%s): %w", p, err) 334 } 335 336 if _, ok := group.filePatterns[Type(fileType)]; !ok { 337 group.filePatterns[Type(fileType)] = []*regexp.Regexp{} 338 } 339 340 group.filePatterns[Type(fileType)] = append(group.filePatterns[Type(fileType)], r) 341 } 342 343 for analyzerType, a := range analyzers { 344 if !belongToGroup(groupName, analyzerType, opt.DisabledAnalyzers, a) { 345 continue 346 } 347 // Initialize only scanners that have Init() 348 if ini, ok := a.(Initializer); ok { 349 if err := ini.Init(opt); err != nil { 350 return AnalyzerGroup{}, xerrors.Errorf("analyzer initialization error: %w", err) 351 } 352 } 353 group.analyzers = append(group.analyzers, a) 354 } 355 356 for analyzerType, init := range postAnalyzers { 357 a, err := init(opt) 358 if err != nil { 359 return AnalyzerGroup{}, xerrors.Errorf("post-analyzer init error: %w", err) 360 } 361 if !belongToGroup(groupName, analyzerType, opt.DisabledAnalyzers, a) { 362 continue 363 } 364 group.postAnalyzers = append(group.postAnalyzers, a) 365 } 366 367 return group, nil 368 } 369 370 type Versions struct { 371 Analyzers map[string]int 372 PostAnalyzers map[string]int 373 } 374 375 // AnalyzerVersions returns analyzer version identifier used for cache keys. 376 func (ag AnalyzerGroup) AnalyzerVersions() Versions { 377 analyzerVersions := make(map[string]int) 378 for _, a := range ag.analyzers { 379 analyzerVersions[string(a.Type())] = a.Version() 380 } 381 postAnalyzerVersions := make(map[string]int) 382 for _, a := range ag.postAnalyzers { 383 postAnalyzerVersions[string(a.Type())] = a.Version() 384 } 385 return Versions{ 386 Analyzers: analyzerVersions, 387 PostAnalyzers: postAnalyzerVersions, 388 } 389 } 390 391 // AnalyzeFile determines which files are required by the analyzers based on the file name and attributes, 392 // and passes only those files to the analyzer for analysis. 393 // This function may be called concurrently and must be thread-safe. 394 func (ag AnalyzerGroup) AnalyzeFile(ctx context.Context, wg *sync.WaitGroup, limit *semaphore.Weighted, result *AnalysisResult, 395 dir, filePath string, info os.FileInfo, opener Opener, disabled []Type, opts AnalysisOptions) error { 396 if info.IsDir() { 397 return nil 398 } 399 400 // filepath extracted from tar file doesn't have the prefix "/" 401 cleanPath := strings.TrimLeft(filePath, "/") 402 403 for _, a := range ag.analyzers { 404 // Skip disabled analyzers 405 if slices.Contains(disabled, a.Type()) { 406 continue 407 } 408 409 if !ag.filePatternMatch(a.Type(), cleanPath) && !a.Required(cleanPath, info) { 410 continue 411 } 412 rc, err := opener() 413 if errors.Is(err, fs.ErrPermission) { 414 log.Logger.Debugf("Permission error: %s", filePath) 415 break 416 } else if err != nil { 417 return xerrors.Errorf("unable to open %s: %w", filePath, err) 418 } 419 420 if err = limit.Acquire(ctx, 1); err != nil { 421 return xerrors.Errorf("semaphore acquire: %w", err) 422 } 423 wg.Add(1) 424 425 go func(a analyzer, rc dio.ReadSeekCloserAt) { 426 defer limit.Release(1) 427 defer wg.Done() 428 defer rc.Close() 429 430 ret, err := a.Analyze(ctx, AnalysisInput{ 431 Dir: dir, 432 FilePath: filePath, 433 Info: info, 434 Content: rc, 435 Options: opts, 436 }) 437 if err != nil && !errors.Is(err, fos.AnalyzeOSError) { 438 log.Logger.Debugf("Analysis error: %s", err) 439 return 440 } 441 result.Merge(ret) 442 }(a, rc) 443 } 444 445 return nil 446 } 447 448 // RequiredPostAnalyzers returns a list of analyzer types that require the given file. 449 func (ag AnalyzerGroup) RequiredPostAnalyzers(filePath string, info os.FileInfo) []Type { 450 if info.IsDir() { 451 return nil 452 } 453 var postAnalyzerTypes []Type 454 for _, a := range ag.postAnalyzers { 455 if ag.filePatternMatch(a.Type(), filePath) || a.Required(filePath, info) { 456 postAnalyzerTypes = append(postAnalyzerTypes, a.Type()) 457 } 458 } 459 return postAnalyzerTypes 460 } 461 462 // PostAnalyze passes a virtual filesystem containing only required files 463 // and passes it to the respective post-analyzer. 464 // The obtained results are merged into the "result". 465 // This function may be called concurrently and must be thread-safe. 466 func (ag AnalyzerGroup) PostAnalyze(ctx context.Context, compositeFS *CompositeFS, result *AnalysisResult, opts AnalysisOptions) error { 467 for _, a := range ag.postAnalyzers { 468 fsys, ok := compositeFS.Get(a.Type()) 469 if !ok { 470 continue 471 } 472 473 skippedFiles := result.SystemInstalledFiles 474 for _, app := range result.Applications { 475 skippedFiles = append(skippedFiles, app.FilePath) 476 for _, lib := range app.Libraries { 477 // The analysis result could contain packages listed in SBOM. 478 // The files of those packages don't have to be analyzed. 479 // This is especially helpful for expensive post-analyzers such as the JAR analyzer. 480 if lib.FilePath != "" { 481 skippedFiles = append(skippedFiles, lib.FilePath) 482 } 483 } 484 } 485 486 filteredFS, err := fsys.Filter(skippedFiles) 487 if err != nil { 488 return xerrors.Errorf("unable to filter filesystem: %w", err) 489 } 490 491 res, err := a.PostAnalyze(ctx, PostAnalysisInput{ 492 FS: filteredFS, 493 Options: opts, 494 }) 495 if err != nil { 496 return xerrors.Errorf("post analysis error: %w", err) 497 } 498 result.Merge(res) 499 } 500 return nil 501 } 502 503 // PostAnalyzerFS returns a composite filesystem that contains multiple filesystems for each post-analyzer 504 func (ag AnalyzerGroup) PostAnalyzerFS() (*CompositeFS, error) { 505 return NewCompositeFS(ag) 506 } 507 508 func (ag AnalyzerGroup) filePatternMatch(analyzerType Type, filePath string) bool { 509 for _, pattern := range ag.filePatterns[analyzerType] { 510 if pattern.MatchString(filePath) { 511 return true 512 } 513 } 514 return false 515 }