github.com/devseccon/trivy@v0.47.1-0.20231123133102-bd902a0bd996/pkg/fanal/secret/scanner.go (about)

     1  package secret
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"os"
     7  	"regexp"
     8  	"sort"
     9  	"strings"
    10  	"sync"
    11  
    12  	"github.com/samber/lo"
    13  	"golang.org/x/exp/slices"
    14  	"golang.org/x/xerrors"
    15  	"gopkg.in/yaml.v3"
    16  
    17  	"github.com/devseccon/trivy/pkg/fanal/log"
    18  	"github.com/devseccon/trivy/pkg/fanal/types"
    19  )
    20  
    21  var lineSep = []byte{'\n'}
    22  
    23  type Scanner struct {
    24  	*Global
    25  }
    26  
    27  type Config struct {
    28  	// Enable only specified built-in rules. If only one ID is specified, all other rules are disabled.
    29  	// All the built-in rules are enabled if this field is not specified. It doesn't affect custom rules.
    30  	EnableBuiltinRuleIDs []string `yaml:"enable-builtin-rules"`
    31  
    32  	// Disable rules. It is applied to enabled IDs.
    33  	DisableRuleIDs []string `yaml:"disable-rules"`
    34  
    35  	// Disable allow rules.
    36  	DisableAllowRuleIDs []string `yaml:"disable-allow-rules"`
    37  
    38  	CustomRules      []Rule       `yaml:"rules"`
    39  	CustomAllowRules AllowRules   `yaml:"allow-rules"`
    40  	ExcludeBlock     ExcludeBlock `yaml:"exclude-block"`
    41  }
    42  
    43  type Global struct {
    44  	Rules        []Rule
    45  	AllowRules   AllowRules
    46  	ExcludeBlock ExcludeBlock
    47  }
    48  
    49  // Allow checks if the match is allowed
    50  func (g Global) Allow(match string) bool {
    51  	return g.AllowRules.Allow(match)
    52  }
    53  
    54  // AllowPath checks if the path is allowed
    55  func (g Global) AllowPath(path string) bool {
    56  	return g.AllowRules.AllowPath(path)
    57  }
    58  
    59  // Regexp adds unmarshalling from YAML for regexp.Regexp
    60  type Regexp struct {
    61  	*regexp.Regexp
    62  }
    63  
    64  func MustCompile(str string) *Regexp {
    65  	return &Regexp{regexp.MustCompile(str)}
    66  }
    67  
    68  // UnmarshalYAML unmarshals YAML into a regexp.Regexp
    69  func (r *Regexp) UnmarshalYAML(value *yaml.Node) error {
    70  	var v string
    71  	if err := value.Decode(&v); err != nil {
    72  		return err
    73  	}
    74  	regex, err := regexp.Compile(v)
    75  	if err != nil {
    76  		return xerrors.Errorf("regexp compile error: %w", err)
    77  	}
    78  
    79  	r.Regexp = regex
    80  	return nil
    81  }
    82  
    83  type Rule struct {
    84  	ID              string                   `yaml:"id"`
    85  	Category        types.SecretRuleCategory `yaml:"category"`
    86  	Title           string                   `yaml:"title"`
    87  	Severity        string                   `yaml:"severity"`
    88  	Regex           *Regexp                  `yaml:"regex"`
    89  	Keywords        []string                 `yaml:"keywords"`
    90  	Path            *Regexp                  `yaml:"path"`
    91  	AllowRules      AllowRules               `yaml:"allow-rules"`
    92  	ExcludeBlock    ExcludeBlock             `yaml:"exclude-block"`
    93  	SecretGroupName string                   `yaml:"secret-group-name"`
    94  }
    95  
    96  func (s *Scanner) FindLocations(r Rule, content []byte) []Location {
    97  	if r.Regex == nil {
    98  		return nil
    99  	}
   100  
   101  	if r.SecretGroupName != "" {
   102  		return s.FindSubmatchLocations(r, content)
   103  	}
   104  
   105  	var locs []Location
   106  	indices := r.Regex.FindAllIndex(content, -1)
   107  	for _, index := range indices {
   108  		loc := Location{
   109  			Start: index[0],
   110  			End:   index[1],
   111  		}
   112  
   113  		if s.AllowLocation(r, content, loc) {
   114  			continue
   115  		}
   116  
   117  		locs = append(locs, loc)
   118  	}
   119  	return locs
   120  }
   121  
   122  func (s *Scanner) FindSubmatchLocations(r Rule, content []byte) []Location {
   123  	var submatchLocations []Location
   124  	matchsIndices := r.Regex.FindAllSubmatchIndex(content, -1)
   125  	for _, matchIndices := range matchsIndices {
   126  		matchLocation := Location{ // first two indexes are always start and end of the whole match
   127  			Start: matchIndices[0],
   128  			End:   matchIndices[1],
   129  		}
   130  
   131  		if s.AllowLocation(r, content, matchLocation) {
   132  			continue
   133  		}
   134  
   135  		matchSubgroupsLocations := r.getMatchSubgroupsLocations(matchIndices)
   136  		if len(matchSubgroupsLocations) > 0 {
   137  			submatchLocations = append(submatchLocations, matchSubgroupsLocations...)
   138  		}
   139  	}
   140  	return submatchLocations
   141  }
   142  
   143  func (s *Scanner) AllowLocation(r Rule, content []byte, loc Location) bool {
   144  	match := string(content[loc.Start:loc.End])
   145  	return s.Allow(match) || r.Allow(match)
   146  }
   147  
   148  func (r *Rule) getMatchSubgroupsLocations(matchLocs []int) []Location {
   149  	var locations []Location
   150  	for i, name := range r.Regex.SubexpNames() {
   151  		if name == r.SecretGroupName {
   152  			startLocIndex := 2 * i
   153  			endLocIndex := startLocIndex + 1
   154  			locations = append(locations, Location{Start: matchLocs[startLocIndex], End: matchLocs[endLocIndex]})
   155  		}
   156  	}
   157  	return locations
   158  }
   159  
   160  func (r *Rule) MatchPath(path string) bool {
   161  	return r.Path == nil || r.Path.MatchString(path)
   162  }
   163  
   164  func (r *Rule) MatchKeywords(content []byte) bool {
   165  	if len(r.Keywords) == 0 {
   166  		return true
   167  	}
   168  
   169  	for _, kw := range r.Keywords {
   170  		if bytes.Contains(bytes.ToLower(content), []byte(strings.ToLower(kw))) {
   171  			return true
   172  		}
   173  	}
   174  
   175  	return false
   176  }
   177  
   178  func (r *Rule) AllowPath(path string) bool {
   179  	return r.AllowRules.AllowPath(path)
   180  }
   181  
   182  func (r *Rule) Allow(match string) bool {
   183  	return r.AllowRules.Allow(match)
   184  }
   185  
   186  type AllowRule struct {
   187  	ID          string  `yaml:"id"`
   188  	Description string  `yaml:"description"`
   189  	Regex       *Regexp `yaml:"regex"`
   190  	Path        *Regexp `yaml:"path"`
   191  }
   192  
   193  type AllowRules []AllowRule
   194  
   195  func (rules AllowRules) AllowPath(path string) bool {
   196  	for _, rule := range rules {
   197  		if rule.Path != nil && rule.Path.MatchString(path) {
   198  			return true
   199  		}
   200  	}
   201  	return false
   202  }
   203  
   204  func (rules AllowRules) Allow(match string) bool {
   205  	for _, rule := range rules {
   206  		if rule.Regex != nil && rule.Regex.MatchString(match) {
   207  			return true
   208  		}
   209  	}
   210  	return false
   211  }
   212  
   213  type ExcludeBlock struct {
   214  	Description string    `yaml:"description"`
   215  	Regexes     []*Regexp `yaml:"regexes"`
   216  }
   217  
   218  type Location struct {
   219  	Start int
   220  	End   int
   221  }
   222  
   223  func (l Location) Match(loc Location) bool {
   224  	return l.Start <= loc.Start && loc.End <= l.End
   225  }
   226  
   227  type Blocks struct {
   228  	content []byte
   229  	regexes []*Regexp
   230  	locs    []Location
   231  	once    *sync.Once
   232  }
   233  
   234  func newBlocks(content []byte, regexes []*Regexp) Blocks {
   235  	return Blocks{
   236  		content: content,
   237  		regexes: regexes,
   238  		once:    new(sync.Once),
   239  	}
   240  }
   241  
   242  func (b *Blocks) Match(block Location) bool {
   243  	b.once.Do(b.find)
   244  	for _, loc := range b.locs {
   245  		if loc.Match(block) {
   246  			return true
   247  		}
   248  	}
   249  	return false
   250  }
   251  
   252  func (b *Blocks) find() {
   253  	for _, regex := range b.regexes {
   254  		results := regex.FindAllIndex(b.content, -1)
   255  		if len(results) == 0 {
   256  			continue
   257  		}
   258  		for _, r := range results {
   259  			b.locs = append(b.locs, Location{
   260  				Start: r[0],
   261  				End:   r[1],
   262  			})
   263  		}
   264  	}
   265  }
   266  
   267  func ParseConfig(configPath string) (*Config, error) {
   268  	// If no config is passed, use built-in rules and allow rules.
   269  	if configPath == "" {
   270  		return nil, nil
   271  	}
   272  
   273  	f, err := os.Open(configPath)
   274  	if errors.Is(err, os.ErrNotExist) {
   275  		// If the specified file doesn't exist, it just uses built-in rules and allow rules.
   276  		log.Logger.Debugf("No secret config detected: %s", configPath)
   277  		return nil, nil
   278  	} else if err != nil {
   279  		return nil, xerrors.Errorf("file open error %s: %w", configPath, err)
   280  	}
   281  	defer f.Close()
   282  
   283  	log.Logger.Infof("Loading %s for secret scanning...", configPath)
   284  
   285  	var config Config
   286  	if err = yaml.NewDecoder(f).Decode(&config); err != nil {
   287  		return nil, xerrors.Errorf("secrets config decode error: %w", err)
   288  	}
   289  
   290  	return &config, nil
   291  }
   292  
   293  func NewScanner(config *Config) Scanner {
   294  	// Use the default rules
   295  	if config == nil {
   296  		return Scanner{Global: &Global{
   297  			Rules:      builtinRules,
   298  			AllowRules: builtinAllowRules,
   299  		}}
   300  	}
   301  
   302  	enabledRules := builtinRules
   303  	if len(config.EnableBuiltinRuleIDs) != 0 {
   304  		// Enable only specified built-in rules
   305  		enabledRules = lo.Filter(builtinRules, func(v Rule, _ int) bool {
   306  			return slices.Contains(config.EnableBuiltinRuleIDs, v.ID)
   307  		})
   308  	}
   309  
   310  	// Custom rules are enabled regardless of "enable-builtin-rules".
   311  	enabledRules = append(enabledRules, config.CustomRules...)
   312  
   313  	// Disable specified rules
   314  	rules := lo.Filter(enabledRules, func(v Rule, _ int) bool {
   315  		return !slices.Contains(config.DisableRuleIDs, v.ID)
   316  	})
   317  
   318  	// Disable specified allow rules
   319  	allowRules := append(builtinAllowRules, config.CustomAllowRules...)
   320  	allowRules = lo.Filter(allowRules, func(v AllowRule, _ int) bool {
   321  		return !slices.Contains(config.DisableAllowRuleIDs, v.ID)
   322  	})
   323  
   324  	return Scanner{Global: &Global{
   325  		Rules:        rules,
   326  		AllowRules:   allowRules,
   327  		ExcludeBlock: config.ExcludeBlock,
   328  	}}
   329  }
   330  
   331  type ScanArgs struct {
   332  	FilePath string
   333  	Content  []byte
   334  }
   335  
   336  type Match struct {
   337  	Rule     Rule
   338  	Location Location
   339  }
   340  
   341  func (s *Scanner) Scan(args ScanArgs) types.Secret {
   342  	// Global allowed paths
   343  	if s.AllowPath(args.FilePath) {
   344  		log.Logger.Debugf("Skipped secret scanning on %q matching allowed paths", args.FilePath)
   345  		return types.Secret{
   346  			FilePath: args.FilePath,
   347  		}
   348  	}
   349  
   350  	var censored []byte
   351  	var copyCensored sync.Once
   352  	var matched []Match
   353  
   354  	var findings []types.SecretFinding
   355  	globalExcludedBlocks := newBlocks(args.Content, s.ExcludeBlock.Regexes)
   356  	for _, rule := range s.Rules {
   357  		// Check if the file path should be scanned by this rule
   358  		if !rule.MatchPath(args.FilePath) {
   359  			log.Logger.Debugf("Skipped secret scanning on %q as non-compliant to the rule %q", args.FilePath, rule.ID)
   360  			continue
   361  		}
   362  
   363  		// Check if the file path should be allowed
   364  		if rule.AllowPath(args.FilePath) {
   365  			log.Logger.Debugf("Skipped secret scanning on %q as allowed", args.FilePath)
   366  			continue
   367  		}
   368  
   369  		// Check if the file content contains keywords and should be scanned
   370  		if !rule.MatchKeywords(args.Content) {
   371  			continue
   372  		}
   373  
   374  		// Detect secrets
   375  		locs := s.FindLocations(rule, args.Content)
   376  		if len(locs) == 0 {
   377  			continue
   378  		}
   379  
   380  		localExcludedBlocks := newBlocks(args.Content, rule.ExcludeBlock.Regexes)
   381  
   382  		for _, loc := range locs {
   383  			// Skip the secret if it is within excluded blocks.
   384  			if globalExcludedBlocks.Match(loc) || localExcludedBlocks.Match(loc) {
   385  				continue
   386  			}
   387  
   388  			matched = append(matched, Match{
   389  				Rule:     rule,
   390  				Location: loc,
   391  			})
   392  			copyCensored.Do(func() {
   393  				censored = make([]byte, len(args.Content))
   394  				copy(censored, args.Content)
   395  			})
   396  			censored = censorLocation(loc, censored)
   397  		}
   398  	}
   399  
   400  	for _, match := range matched {
   401  		findings = append(findings, toFinding(match.Rule, match.Location, censored))
   402  	}
   403  
   404  	if len(findings) == 0 {
   405  		return types.Secret{}
   406  	}
   407  
   408  	sort.Slice(findings, func(i, j int) bool {
   409  		if findings[i].RuleID != findings[j].RuleID {
   410  			return findings[i].RuleID < findings[j].RuleID
   411  		}
   412  		return findings[i].Match < findings[j].Match
   413  	})
   414  
   415  	return types.Secret{
   416  		FilePath: args.FilePath,
   417  		Findings: findings,
   418  	}
   419  }
   420  
   421  func censorLocation(loc Location, input []byte) []byte {
   422  	return append(
   423  		input[:loc.Start],
   424  		append(
   425  			bytes.Repeat([]byte("*"), loc.End-loc.Start),
   426  			input[loc.End:]...,
   427  		)...,
   428  	)
   429  }
   430  
   431  func toFinding(rule Rule, loc Location, content []byte) types.SecretFinding {
   432  	startLine, endLine, code, matchLine := findLocation(loc.Start, loc.End, content)
   433  
   434  	return types.SecretFinding{
   435  		RuleID:    rule.ID,
   436  		Category:  rule.Category,
   437  		Severity:  lo.Ternary(rule.Severity == "", "UNKNOWN", rule.Severity),
   438  		Title:     rule.Title,
   439  		Match:     matchLine,
   440  		StartLine: startLine,
   441  		EndLine:   endLine,
   442  		Code:      code,
   443  	}
   444  }
   445  
   446  const secretHighlightRadius = 2 // number of lines above + below each secret to include in code output
   447  
   448  func findLocation(start, end int, content []byte) (int, int, types.Code, string) {
   449  	startLineNum := bytes.Count(content[:start], lineSep)
   450  
   451  	lineStart := bytes.LastIndex(content[:start], lineSep)
   452  	if lineStart == -1 {
   453  		lineStart = 0
   454  	} else {
   455  		lineStart += 1
   456  	}
   457  
   458  	lineEnd := bytes.Index(content[start:], lineSep)
   459  	if lineEnd == -1 {
   460  		lineEnd = len(content)
   461  	} else {
   462  		lineEnd += start
   463  	}
   464  
   465  	if lineEnd-lineStart > 100 {
   466  		lineStart = lo.Ternary(start-30 < 0, 0, start-30)
   467  		lineEnd = lo.Ternary(end+20 > len(content), len(content), end+20)
   468  	}
   469  	matchLine := string(content[lineStart:lineEnd])
   470  	endLineNum := startLineNum + bytes.Count(content[start:end], lineSep)
   471  
   472  	var code types.Code
   473  
   474  	lines := bytes.Split(content, lineSep)
   475  	codeStart := lo.Ternary(startLineNum-secretHighlightRadius < 0, 0, startLineNum-secretHighlightRadius)
   476  	codeEnd := lo.Ternary(endLineNum+secretHighlightRadius > len(lines), len(lines), endLineNum+secretHighlightRadius)
   477  
   478  	rawLines := lines[codeStart:codeEnd]
   479  	var foundFirst bool
   480  	for i, rawLine := range rawLines {
   481  		strRawLine := string(rawLine)
   482  		realLine := codeStart + i
   483  		inCause := realLine >= startLineNum && realLine <= endLineNum
   484  		code.Lines = append(code.Lines, types.Line{
   485  			Number:      codeStart + i + 1,
   486  			Content:     strRawLine,
   487  			IsCause:     inCause,
   488  			Highlighted: strRawLine,
   489  			FirstCause:  !foundFirst && inCause,
   490  			LastCause:   false,
   491  		})
   492  		foundFirst = foundFirst || inCause
   493  	}
   494  	if len(code.Lines) > 0 {
   495  		for i := len(code.Lines) - 1; i >= 0; i-- {
   496  			if code.Lines[i].IsCause {
   497  				code.Lines[i].LastCause = true
   498  				break
   499  			}
   500  		}
   501  	}
   502  
   503  	return startLineNum + 1, endLineNum + 1, code, matchLine
   504  }