github.com/devseccon/trivy@v0.47.1-0.20231123133102-bd902a0bd996/pkg/fanal/analyzer/analyzer.go (about)

     1  package analyzer
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"io/fs"
     7  	"os"
     8  	"regexp"
     9  	"sort"
    10  	"strings"
    11  	"sync"
    12  
    13  	"github.com/samber/lo"
    14  	"golang.org/x/exp/slices"
    15  	"golang.org/x/sync/semaphore"
    16  	"golang.org/x/xerrors"
    17  
    18  	dio "github.com/aquasecurity/go-dep-parser/pkg/io"
    19  	fos "github.com/devseccon/trivy/pkg/fanal/analyzer/os"
    20  	"github.com/devseccon/trivy/pkg/fanal/log"
    21  	"github.com/devseccon/trivy/pkg/fanal/types"
    22  	"github.com/devseccon/trivy/pkg/misconf"
    23  )
    24  
    25  var (
    26  	analyzers     = make(map[Type]analyzer)
    27  	postAnalyzers = make(map[Type]postAnalyzerInitialize)
    28  
    29  	// ErrUnknownOS occurs when unknown OS is analyzed.
    30  	ErrUnknownOS = xerrors.New("unknown OS")
    31  	// ErrPkgAnalysis occurs when the analysis of packages is failed.
    32  	ErrPkgAnalysis = xerrors.New("failed to analyze packages")
    33  	// ErrNoPkgsDetected occurs when the required files for an OS package manager are not detected
    34  	ErrNoPkgsDetected = xerrors.New("no packages detected")
    35  )
    36  
    37  //////////////////////
    38  // Analyzer options //
    39  //////////////////////
    40  
    41  // AnalyzerOptions is used to initialize analyzers
    42  type AnalyzerOptions struct {
    43  	Group                Group
    44  	Parallel             int
    45  	FilePatterns         []string
    46  	DisabledAnalyzers    []Type
    47  	MisconfScannerOption misconf.ScannerOption
    48  	SecretScannerOption  SecretScannerOption
    49  	LicenseScannerOption LicenseScannerOption
    50  }
    51  
    52  type SecretScannerOption struct {
    53  	ConfigPath string
    54  }
    55  
    56  type LicenseScannerOption struct {
    57  	// Use license classifier to get better results though the classification is expensive.
    58  	Full                      bool
    59  	ClassifierConfidenceLevel float64
    60  }
    61  
    62  ////////////////
    63  // Interfaces //
    64  ////////////////
    65  
    66  // Initializer represents analyzers that need to take parameters from users
    67  type Initializer interface {
    68  	Init(AnalyzerOptions) error
    69  }
    70  
    71  type analyzer interface {
    72  	Type() Type
    73  	Version() int
    74  	Analyze(ctx context.Context, input AnalysisInput) (*AnalysisResult, error)
    75  	Required(filePath string, info os.FileInfo) bool
    76  }
    77  
    78  type PostAnalyzer interface {
    79  	Type() Type
    80  	Version() int
    81  	PostAnalyze(ctx context.Context, input PostAnalysisInput) (*AnalysisResult, error)
    82  	Required(filePath string, info os.FileInfo) bool
    83  }
    84  
    85  ////////////////////
    86  // Analyzer group //
    87  ////////////////////
    88  
    89  type Group string
    90  
    91  const GroupBuiltin Group = "builtin"
    92  
    93  func RegisterAnalyzer(analyzer analyzer) {
    94  	if _, ok := analyzers[analyzer.Type()]; ok {
    95  		log.Logger.Fatalf("analyzer %s is registered twice", analyzer.Type())
    96  	}
    97  	analyzers[analyzer.Type()] = analyzer
    98  }
    99  
   100  type postAnalyzerInitialize func(options AnalyzerOptions) (PostAnalyzer, error)
   101  
   102  func RegisterPostAnalyzer(t Type, initializer postAnalyzerInitialize) {
   103  	if _, ok := postAnalyzers[t]; ok {
   104  		log.Logger.Fatalf("analyzer %s is registered twice", t)
   105  	}
   106  	postAnalyzers[t] = initializer
   107  }
   108  
   109  // DeregisterAnalyzer is mainly for testing
   110  func DeregisterAnalyzer(t Type) {
   111  	delete(analyzers, t)
   112  }
   113  
   114  // CustomGroup returns a group name for custom analyzers
   115  // This is mainly intended to be used in DevSecCon products.
   116  type CustomGroup interface {
   117  	Group() Group
   118  }
   119  
   120  type Opener func() (dio.ReadSeekCloserAt, error)
   121  
   122  type AnalyzerGroup struct {
   123  	analyzers     []analyzer
   124  	postAnalyzers []PostAnalyzer
   125  	filePatterns  map[Type][]*regexp.Regexp
   126  }
   127  
   128  ///////////////////////////
   129  // Analyzer input/output //
   130  ///////////////////////////
   131  
   132  type AnalysisInput struct {
   133  	Dir      string
   134  	FilePath string
   135  	Info     os.FileInfo
   136  	Content  dio.ReadSeekerAt
   137  
   138  	Options AnalysisOptions
   139  }
   140  
   141  type PostAnalysisInput struct {
   142  	FS      fs.FS
   143  	Options AnalysisOptions
   144  }
   145  
   146  type AnalysisOptions struct {
   147  	Offline      bool
   148  	FileChecksum bool
   149  }
   150  
   151  type AnalysisResult struct {
   152  	m                    sync.Mutex
   153  	OS                   types.OS
   154  	Repository           *types.Repository
   155  	PackageInfos         []types.PackageInfo
   156  	Applications         []types.Application
   157  	Misconfigurations    []types.Misconfiguration
   158  	Secrets              []types.Secret
   159  	Licenses             []types.LicenseFile
   160  	SystemInstalledFiles []string // A list of files installed by OS package manager
   161  
   162  	// Digests contains SHA-256 digests of unpackaged files
   163  	// used to search for SBOM attestation.
   164  	Digests map[string]string
   165  
   166  	// For Red Hat
   167  	BuildInfo *types.BuildInfo
   168  
   169  	// CustomResources hold analysis results from custom analyzers.
   170  	// It is for extensibility and not used in OSS.
   171  	CustomResources []types.CustomResource
   172  }
   173  
   174  func NewAnalysisResult() *AnalysisResult {
   175  	result := new(AnalysisResult)
   176  	return result
   177  }
   178  
   179  func (r *AnalysisResult) isEmpty() bool {
   180  	return lo.IsEmpty(r.OS) && r.Repository == nil && len(r.PackageInfos) == 0 && len(r.Applications) == 0 &&
   181  		len(r.Misconfigurations) == 0 && len(r.Secrets) == 0 && len(r.Licenses) == 0 && len(r.SystemInstalledFiles) == 0 &&
   182  		r.BuildInfo == nil && len(r.Digests) == 0 && len(r.CustomResources) == 0
   183  }
   184  
   185  func (r *AnalysisResult) Sort() {
   186  	// OS packages
   187  	sort.Slice(r.PackageInfos, func(i, j int) bool {
   188  		return r.PackageInfos[i].FilePath < r.PackageInfos[j].FilePath
   189  	})
   190  
   191  	for _, pi := range r.PackageInfos {
   192  		sort.Sort(pi.Packages)
   193  	}
   194  
   195  	// Language-specific packages
   196  	sort.Slice(r.Applications, func(i, j int) bool {
   197  		if r.Applications[i].FilePath != r.Applications[j].FilePath {
   198  			return r.Applications[i].FilePath < r.Applications[j].FilePath
   199  		}
   200  		return r.Applications[i].Type < r.Applications[j].Type
   201  	})
   202  
   203  	for _, app := range r.Applications {
   204  		sort.Sort(app.Libraries)
   205  	}
   206  
   207  	// Custom resources
   208  	sort.Slice(r.CustomResources, func(i, j int) bool {
   209  		return r.CustomResources[i].FilePath < r.CustomResources[j].FilePath
   210  	})
   211  
   212  	// Misconfigurations
   213  	sort.Slice(r.Misconfigurations, func(i, j int) bool {
   214  		return r.Misconfigurations[i].FilePath < r.Misconfigurations[j].FilePath
   215  	})
   216  
   217  	// Secrets
   218  	sort.Slice(r.Secrets, func(i, j int) bool {
   219  		return r.Secrets[i].FilePath < r.Secrets[j].FilePath
   220  	})
   221  	for _, sec := range r.Secrets {
   222  		sort.Slice(sec.Findings, func(i, j int) bool {
   223  			if sec.Findings[i].RuleID != sec.Findings[j].RuleID {
   224  				return sec.Findings[i].RuleID < sec.Findings[j].RuleID
   225  			}
   226  			return sec.Findings[i].StartLine < sec.Findings[j].StartLine
   227  		})
   228  	}
   229  
   230  	// License files
   231  	sort.Slice(r.Licenses, func(i, j int) bool {
   232  		if r.Licenses[i].Type == r.Licenses[j].Type {
   233  			if r.Licenses[i].FilePath == r.Licenses[j].FilePath {
   234  				return r.Licenses[i].Layer.DiffID < r.Licenses[j].Layer.DiffID
   235  			} else {
   236  				return r.Licenses[i].FilePath < r.Licenses[j].FilePath
   237  			}
   238  		}
   239  
   240  		return r.Licenses[i].Type < r.Licenses[j].Type
   241  	})
   242  }
   243  
   244  func (r *AnalysisResult) Merge(newResult *AnalysisResult) {
   245  	if newResult == nil || newResult.isEmpty() {
   246  		return
   247  	}
   248  
   249  	// this struct is accessed by multiple goroutines
   250  	r.m.Lock()
   251  	defer r.m.Unlock()
   252  
   253  	r.OS.Merge(newResult.OS)
   254  
   255  	if newResult.Repository != nil {
   256  		r.Repository = newResult.Repository
   257  	}
   258  
   259  	if len(newResult.PackageInfos) > 0 {
   260  		r.PackageInfos = append(r.PackageInfos, newResult.PackageInfos...)
   261  	}
   262  
   263  	if len(newResult.Applications) > 0 {
   264  		r.Applications = append(r.Applications, newResult.Applications...)
   265  	}
   266  
   267  	// Merge SHA-256 digests of unpackaged files
   268  	if newResult.Digests != nil {
   269  		r.Digests = lo.Assign(r.Digests, newResult.Digests)
   270  	}
   271  
   272  	r.Misconfigurations = append(r.Misconfigurations, newResult.Misconfigurations...)
   273  	r.Secrets = append(r.Secrets, newResult.Secrets...)
   274  	r.Licenses = append(r.Licenses, newResult.Licenses...)
   275  	r.SystemInstalledFiles = append(r.SystemInstalledFiles, newResult.SystemInstalledFiles...)
   276  
   277  	if newResult.BuildInfo != nil {
   278  		if r.BuildInfo == nil {
   279  			r.BuildInfo = newResult.BuildInfo
   280  		} else {
   281  			// We don't need to merge build info here
   282  			// because there is theoretically only one file about build info in each layer.
   283  			if newResult.BuildInfo.Nvr != "" || newResult.BuildInfo.Arch != "" {
   284  				r.BuildInfo.Nvr = newResult.BuildInfo.Nvr
   285  				r.BuildInfo.Arch = newResult.BuildInfo.Arch
   286  			}
   287  			if len(newResult.BuildInfo.ContentSets) > 0 {
   288  				r.BuildInfo.ContentSets = newResult.BuildInfo.ContentSets
   289  			}
   290  		}
   291  	}
   292  
   293  	r.CustomResources = append(r.CustomResources, newResult.CustomResources...)
   294  }
   295  
   296  func belongToGroup(groupName Group, analyzerType Type, disabledAnalyzers []Type, analyzer any) bool {
   297  	if slices.Contains(disabledAnalyzers, analyzerType) {
   298  		return false
   299  	}
   300  
   301  	analyzerGroupName := GroupBuiltin
   302  	if cg, ok := analyzer.(CustomGroup); ok {
   303  		analyzerGroupName = cg.Group()
   304  	}
   305  	if analyzerGroupName != groupName {
   306  		return false
   307  	}
   308  
   309  	return true
   310  }
   311  
   312  const separator = ":"
   313  
   314  func NewAnalyzerGroup(opt AnalyzerOptions) (AnalyzerGroup, error) {
   315  	groupName := opt.Group
   316  	if groupName == "" {
   317  		groupName = GroupBuiltin
   318  	}
   319  
   320  	group := AnalyzerGroup{
   321  		filePatterns: make(map[Type][]*regexp.Regexp),
   322  	}
   323  	for _, p := range opt.FilePatterns {
   324  		// e.g. "dockerfile:my_dockerfile_*"
   325  		s := strings.SplitN(p, separator, 2)
   326  		if len(s) != 2 {
   327  			return group, xerrors.Errorf("invalid file pattern (%s) expected format: \"fileType:regexPattern\" e.g. \"dockerfile:my_dockerfile_*\"", p)
   328  		}
   329  
   330  		fileType, pattern := s[0], s[1]
   331  		r, err := regexp.Compile(pattern)
   332  		if err != nil {
   333  			return group, xerrors.Errorf("invalid file regexp (%s): %w", p, err)
   334  		}
   335  
   336  		if _, ok := group.filePatterns[Type(fileType)]; !ok {
   337  			group.filePatterns[Type(fileType)] = []*regexp.Regexp{}
   338  		}
   339  
   340  		group.filePatterns[Type(fileType)] = append(group.filePatterns[Type(fileType)], r)
   341  	}
   342  
   343  	for analyzerType, a := range analyzers {
   344  		if !belongToGroup(groupName, analyzerType, opt.DisabledAnalyzers, a) {
   345  			continue
   346  		}
   347  		// Initialize only scanners that have Init()
   348  		if ini, ok := a.(Initializer); ok {
   349  			if err := ini.Init(opt); err != nil {
   350  				return AnalyzerGroup{}, xerrors.Errorf("analyzer initialization error: %w", err)
   351  			}
   352  		}
   353  		group.analyzers = append(group.analyzers, a)
   354  	}
   355  
   356  	for analyzerType, init := range postAnalyzers {
   357  		a, err := init(opt)
   358  		if err != nil {
   359  			return AnalyzerGroup{}, xerrors.Errorf("post-analyzer init error: %w", err)
   360  		}
   361  		if !belongToGroup(groupName, analyzerType, opt.DisabledAnalyzers, a) {
   362  			continue
   363  		}
   364  		group.postAnalyzers = append(group.postAnalyzers, a)
   365  	}
   366  
   367  	return group, nil
   368  }
   369  
   370  type Versions struct {
   371  	Analyzers     map[string]int
   372  	PostAnalyzers map[string]int
   373  }
   374  
   375  // AnalyzerVersions returns analyzer version identifier used for cache keys.
   376  func (ag AnalyzerGroup) AnalyzerVersions() Versions {
   377  	analyzerVersions := make(map[string]int)
   378  	for _, a := range ag.analyzers {
   379  		analyzerVersions[string(a.Type())] = a.Version()
   380  	}
   381  	postAnalyzerVersions := make(map[string]int)
   382  	for _, a := range ag.postAnalyzers {
   383  		postAnalyzerVersions[string(a.Type())] = a.Version()
   384  	}
   385  	return Versions{
   386  		Analyzers:     analyzerVersions,
   387  		PostAnalyzers: postAnalyzerVersions,
   388  	}
   389  }
   390  
   391  // AnalyzeFile determines which files are required by the analyzers based on the file name and attributes,
   392  // and passes only those files to the analyzer for analysis.
   393  // This function may be called concurrently and must be thread-safe.
   394  func (ag AnalyzerGroup) AnalyzeFile(ctx context.Context, wg *sync.WaitGroup, limit *semaphore.Weighted, result *AnalysisResult,
   395  	dir, filePath string, info os.FileInfo, opener Opener, disabled []Type, opts AnalysisOptions) error {
   396  	if info.IsDir() {
   397  		return nil
   398  	}
   399  
   400  	// filepath extracted from tar file doesn't have the prefix "/"
   401  	cleanPath := strings.TrimLeft(filePath, "/")
   402  
   403  	for _, a := range ag.analyzers {
   404  		// Skip disabled analyzers
   405  		if slices.Contains(disabled, a.Type()) {
   406  			continue
   407  		}
   408  
   409  		if !ag.filePatternMatch(a.Type(), cleanPath) && !a.Required(cleanPath, info) {
   410  			continue
   411  		}
   412  		rc, err := opener()
   413  		if errors.Is(err, fs.ErrPermission) {
   414  			log.Logger.Debugf("Permission error: %s", filePath)
   415  			break
   416  		} else if err != nil {
   417  			return xerrors.Errorf("unable to open %s: %w", filePath, err)
   418  		}
   419  
   420  		if err = limit.Acquire(ctx, 1); err != nil {
   421  			return xerrors.Errorf("semaphore acquire: %w", err)
   422  		}
   423  		wg.Add(1)
   424  
   425  		go func(a analyzer, rc dio.ReadSeekCloserAt) {
   426  			defer limit.Release(1)
   427  			defer wg.Done()
   428  			defer rc.Close()
   429  
   430  			ret, err := a.Analyze(ctx, AnalysisInput{
   431  				Dir:      dir,
   432  				FilePath: filePath,
   433  				Info:     info,
   434  				Content:  rc,
   435  				Options:  opts,
   436  			})
   437  			if err != nil && !errors.Is(err, fos.AnalyzeOSError) {
   438  				log.Logger.Debugf("Analysis error: %s", err)
   439  				return
   440  			}
   441  			result.Merge(ret)
   442  		}(a, rc)
   443  	}
   444  
   445  	return nil
   446  }
   447  
   448  // RequiredPostAnalyzers returns a list of analyzer types that require the given file.
   449  func (ag AnalyzerGroup) RequiredPostAnalyzers(filePath string, info os.FileInfo) []Type {
   450  	if info.IsDir() {
   451  		return nil
   452  	}
   453  	var postAnalyzerTypes []Type
   454  	for _, a := range ag.postAnalyzers {
   455  		if ag.filePatternMatch(a.Type(), filePath) || a.Required(filePath, info) {
   456  			postAnalyzerTypes = append(postAnalyzerTypes, a.Type())
   457  		}
   458  	}
   459  	return postAnalyzerTypes
   460  }
   461  
   462  // PostAnalyze passes a virtual filesystem containing only required files
   463  // and passes it to the respective post-analyzer.
   464  // The obtained results are merged into the "result".
   465  // This function may be called concurrently and must be thread-safe.
   466  func (ag AnalyzerGroup) PostAnalyze(ctx context.Context, compositeFS *CompositeFS, result *AnalysisResult, opts AnalysisOptions) error {
   467  	for _, a := range ag.postAnalyzers {
   468  		fsys, ok := compositeFS.Get(a.Type())
   469  		if !ok {
   470  			continue
   471  		}
   472  
   473  		skippedFiles := result.SystemInstalledFiles
   474  		for _, app := range result.Applications {
   475  			skippedFiles = append(skippedFiles, app.FilePath)
   476  			for _, lib := range app.Libraries {
   477  				// The analysis result could contain packages listed in SBOM.
   478  				// The files of those packages don't have to be analyzed.
   479  				// This is especially helpful for expensive post-analyzers such as the JAR analyzer.
   480  				if lib.FilePath != "" {
   481  					skippedFiles = append(skippedFiles, lib.FilePath)
   482  				}
   483  			}
   484  		}
   485  
   486  		filteredFS, err := fsys.Filter(skippedFiles)
   487  		if err != nil {
   488  			return xerrors.Errorf("unable to filter filesystem: %w", err)
   489  		}
   490  
   491  		res, err := a.PostAnalyze(ctx, PostAnalysisInput{
   492  			FS:      filteredFS,
   493  			Options: opts,
   494  		})
   495  		if err != nil {
   496  			return xerrors.Errorf("post analysis error: %w", err)
   497  		}
   498  		result.Merge(res)
   499  	}
   500  	return nil
   501  }
   502  
   503  // PostAnalyzerFS returns a composite filesystem that contains multiple filesystems for each post-analyzer
   504  func (ag AnalyzerGroup) PostAnalyzerFS() (*CompositeFS, error) {
   505  	return NewCompositeFS(ag)
   506  }
   507  
   508  func (ag AnalyzerGroup) filePatternMatch(analyzerType Type, filePath string) bool {
   509  	for _, pattern := range ag.filePatterns[analyzerType] {
   510  		if pattern.MatchString(filePath) {
   511  			return true
   512  		}
   513  	}
   514  	return false
   515  }