github.com/devseccon/trivy@v0.47.1-0.20231123133102-bd902a0bd996/pkg/fanal/secret/scanner.go (about) 1 package secret 2 3 import ( 4 "bytes" 5 "errors" 6 "os" 7 "regexp" 8 "sort" 9 "strings" 10 "sync" 11 12 "github.com/samber/lo" 13 "golang.org/x/exp/slices" 14 "golang.org/x/xerrors" 15 "gopkg.in/yaml.v3" 16 17 "github.com/devseccon/trivy/pkg/fanal/log" 18 "github.com/devseccon/trivy/pkg/fanal/types" 19 ) 20 21 var lineSep = []byte{'\n'} 22 23 type Scanner struct { 24 *Global 25 } 26 27 type Config struct { 28 // Enable only specified built-in rules. If only one ID is specified, all other rules are disabled. 29 // All the built-in rules are enabled if this field is not specified. It doesn't affect custom rules. 30 EnableBuiltinRuleIDs []string `yaml:"enable-builtin-rules"` 31 32 // Disable rules. It is applied to enabled IDs. 33 DisableRuleIDs []string `yaml:"disable-rules"` 34 35 // Disable allow rules. 36 DisableAllowRuleIDs []string `yaml:"disable-allow-rules"` 37 38 CustomRules []Rule `yaml:"rules"` 39 CustomAllowRules AllowRules `yaml:"allow-rules"` 40 ExcludeBlock ExcludeBlock `yaml:"exclude-block"` 41 } 42 43 type Global struct { 44 Rules []Rule 45 AllowRules AllowRules 46 ExcludeBlock ExcludeBlock 47 } 48 49 // Allow checks if the match is allowed 50 func (g Global) Allow(match string) bool { 51 return g.AllowRules.Allow(match) 52 } 53 54 // AllowPath checks if the path is allowed 55 func (g Global) AllowPath(path string) bool { 56 return g.AllowRules.AllowPath(path) 57 } 58 59 // Regexp adds unmarshalling from YAML for regexp.Regexp 60 type Regexp struct { 61 *regexp.Regexp 62 } 63 64 func MustCompile(str string) *Regexp { 65 return &Regexp{regexp.MustCompile(str)} 66 } 67 68 // UnmarshalYAML unmarshals YAML into a regexp.Regexp 69 func (r *Regexp) UnmarshalYAML(value *yaml.Node) error { 70 var v string 71 if err := value.Decode(&v); err != nil { 72 return err 73 } 74 regex, err := regexp.Compile(v) 75 if err != nil { 76 return xerrors.Errorf("regexp compile error: %w", err) 77 } 78 79 r.Regexp = regex 80 return nil 81 } 82 83 type Rule struct { 84 ID string `yaml:"id"` 85 Category types.SecretRuleCategory `yaml:"category"` 86 Title string `yaml:"title"` 87 Severity string `yaml:"severity"` 88 Regex *Regexp `yaml:"regex"` 89 Keywords []string `yaml:"keywords"` 90 Path *Regexp `yaml:"path"` 91 AllowRules AllowRules `yaml:"allow-rules"` 92 ExcludeBlock ExcludeBlock `yaml:"exclude-block"` 93 SecretGroupName string `yaml:"secret-group-name"` 94 } 95 96 func (s *Scanner) FindLocations(r Rule, content []byte) []Location { 97 if r.Regex == nil { 98 return nil 99 } 100 101 if r.SecretGroupName != "" { 102 return s.FindSubmatchLocations(r, content) 103 } 104 105 var locs []Location 106 indices := r.Regex.FindAllIndex(content, -1) 107 for _, index := range indices { 108 loc := Location{ 109 Start: index[0], 110 End: index[1], 111 } 112 113 if s.AllowLocation(r, content, loc) { 114 continue 115 } 116 117 locs = append(locs, loc) 118 } 119 return locs 120 } 121 122 func (s *Scanner) FindSubmatchLocations(r Rule, content []byte) []Location { 123 var submatchLocations []Location 124 matchsIndices := r.Regex.FindAllSubmatchIndex(content, -1) 125 for _, matchIndices := range matchsIndices { 126 matchLocation := Location{ // first two indexes are always start and end of the whole match 127 Start: matchIndices[0], 128 End: matchIndices[1], 129 } 130 131 if s.AllowLocation(r, content, matchLocation) { 132 continue 133 } 134 135 matchSubgroupsLocations := r.getMatchSubgroupsLocations(matchIndices) 136 if len(matchSubgroupsLocations) > 0 { 137 submatchLocations = append(submatchLocations, matchSubgroupsLocations...) 138 } 139 } 140 return submatchLocations 141 } 142 143 func (s *Scanner) AllowLocation(r Rule, content []byte, loc Location) bool { 144 match := string(content[loc.Start:loc.End]) 145 return s.Allow(match) || r.Allow(match) 146 } 147 148 func (r *Rule) getMatchSubgroupsLocations(matchLocs []int) []Location { 149 var locations []Location 150 for i, name := range r.Regex.SubexpNames() { 151 if name == r.SecretGroupName { 152 startLocIndex := 2 * i 153 endLocIndex := startLocIndex + 1 154 locations = append(locations, Location{Start: matchLocs[startLocIndex], End: matchLocs[endLocIndex]}) 155 } 156 } 157 return locations 158 } 159 160 func (r *Rule) MatchPath(path string) bool { 161 return r.Path == nil || r.Path.MatchString(path) 162 } 163 164 func (r *Rule) MatchKeywords(content []byte) bool { 165 if len(r.Keywords) == 0 { 166 return true 167 } 168 169 for _, kw := range r.Keywords { 170 if bytes.Contains(bytes.ToLower(content), []byte(strings.ToLower(kw))) { 171 return true 172 } 173 } 174 175 return false 176 } 177 178 func (r *Rule) AllowPath(path string) bool { 179 return r.AllowRules.AllowPath(path) 180 } 181 182 func (r *Rule) Allow(match string) bool { 183 return r.AllowRules.Allow(match) 184 } 185 186 type AllowRule struct { 187 ID string `yaml:"id"` 188 Description string `yaml:"description"` 189 Regex *Regexp `yaml:"regex"` 190 Path *Regexp `yaml:"path"` 191 } 192 193 type AllowRules []AllowRule 194 195 func (rules AllowRules) AllowPath(path string) bool { 196 for _, rule := range rules { 197 if rule.Path != nil && rule.Path.MatchString(path) { 198 return true 199 } 200 } 201 return false 202 } 203 204 func (rules AllowRules) Allow(match string) bool { 205 for _, rule := range rules { 206 if rule.Regex != nil && rule.Regex.MatchString(match) { 207 return true 208 } 209 } 210 return false 211 } 212 213 type ExcludeBlock struct { 214 Description string `yaml:"description"` 215 Regexes []*Regexp `yaml:"regexes"` 216 } 217 218 type Location struct { 219 Start int 220 End int 221 } 222 223 func (l Location) Match(loc Location) bool { 224 return l.Start <= loc.Start && loc.End <= l.End 225 } 226 227 type Blocks struct { 228 content []byte 229 regexes []*Regexp 230 locs []Location 231 once *sync.Once 232 } 233 234 func newBlocks(content []byte, regexes []*Regexp) Blocks { 235 return Blocks{ 236 content: content, 237 regexes: regexes, 238 once: new(sync.Once), 239 } 240 } 241 242 func (b *Blocks) Match(block Location) bool { 243 b.once.Do(b.find) 244 for _, loc := range b.locs { 245 if loc.Match(block) { 246 return true 247 } 248 } 249 return false 250 } 251 252 func (b *Blocks) find() { 253 for _, regex := range b.regexes { 254 results := regex.FindAllIndex(b.content, -1) 255 if len(results) == 0 { 256 continue 257 } 258 for _, r := range results { 259 b.locs = append(b.locs, Location{ 260 Start: r[0], 261 End: r[1], 262 }) 263 } 264 } 265 } 266 267 func ParseConfig(configPath string) (*Config, error) { 268 // If no config is passed, use built-in rules and allow rules. 269 if configPath == "" { 270 return nil, nil 271 } 272 273 f, err := os.Open(configPath) 274 if errors.Is(err, os.ErrNotExist) { 275 // If the specified file doesn't exist, it just uses built-in rules and allow rules. 276 log.Logger.Debugf("No secret config detected: %s", configPath) 277 return nil, nil 278 } else if err != nil { 279 return nil, xerrors.Errorf("file open error %s: %w", configPath, err) 280 } 281 defer f.Close() 282 283 log.Logger.Infof("Loading %s for secret scanning...", configPath) 284 285 var config Config 286 if err = yaml.NewDecoder(f).Decode(&config); err != nil { 287 return nil, xerrors.Errorf("secrets config decode error: %w", err) 288 } 289 290 return &config, nil 291 } 292 293 func NewScanner(config *Config) Scanner { 294 // Use the default rules 295 if config == nil { 296 return Scanner{Global: &Global{ 297 Rules: builtinRules, 298 AllowRules: builtinAllowRules, 299 }} 300 } 301 302 enabledRules := builtinRules 303 if len(config.EnableBuiltinRuleIDs) != 0 { 304 // Enable only specified built-in rules 305 enabledRules = lo.Filter(builtinRules, func(v Rule, _ int) bool { 306 return slices.Contains(config.EnableBuiltinRuleIDs, v.ID) 307 }) 308 } 309 310 // Custom rules are enabled regardless of "enable-builtin-rules". 311 enabledRules = append(enabledRules, config.CustomRules...) 312 313 // Disable specified rules 314 rules := lo.Filter(enabledRules, func(v Rule, _ int) bool { 315 return !slices.Contains(config.DisableRuleIDs, v.ID) 316 }) 317 318 // Disable specified allow rules 319 allowRules := append(builtinAllowRules, config.CustomAllowRules...) 320 allowRules = lo.Filter(allowRules, func(v AllowRule, _ int) bool { 321 return !slices.Contains(config.DisableAllowRuleIDs, v.ID) 322 }) 323 324 return Scanner{Global: &Global{ 325 Rules: rules, 326 AllowRules: allowRules, 327 ExcludeBlock: config.ExcludeBlock, 328 }} 329 } 330 331 type ScanArgs struct { 332 FilePath string 333 Content []byte 334 } 335 336 type Match struct { 337 Rule Rule 338 Location Location 339 } 340 341 func (s *Scanner) Scan(args ScanArgs) types.Secret { 342 // Global allowed paths 343 if s.AllowPath(args.FilePath) { 344 log.Logger.Debugf("Skipped secret scanning on %q matching allowed paths", args.FilePath) 345 return types.Secret{ 346 FilePath: args.FilePath, 347 } 348 } 349 350 var censored []byte 351 var copyCensored sync.Once 352 var matched []Match 353 354 var findings []types.SecretFinding 355 globalExcludedBlocks := newBlocks(args.Content, s.ExcludeBlock.Regexes) 356 for _, rule := range s.Rules { 357 // Check if the file path should be scanned by this rule 358 if !rule.MatchPath(args.FilePath) { 359 log.Logger.Debugf("Skipped secret scanning on %q as non-compliant to the rule %q", args.FilePath, rule.ID) 360 continue 361 } 362 363 // Check if the file path should be allowed 364 if rule.AllowPath(args.FilePath) { 365 log.Logger.Debugf("Skipped secret scanning on %q as allowed", args.FilePath) 366 continue 367 } 368 369 // Check if the file content contains keywords and should be scanned 370 if !rule.MatchKeywords(args.Content) { 371 continue 372 } 373 374 // Detect secrets 375 locs := s.FindLocations(rule, args.Content) 376 if len(locs) == 0 { 377 continue 378 } 379 380 localExcludedBlocks := newBlocks(args.Content, rule.ExcludeBlock.Regexes) 381 382 for _, loc := range locs { 383 // Skip the secret if it is within excluded blocks. 384 if globalExcludedBlocks.Match(loc) || localExcludedBlocks.Match(loc) { 385 continue 386 } 387 388 matched = append(matched, Match{ 389 Rule: rule, 390 Location: loc, 391 }) 392 copyCensored.Do(func() { 393 censored = make([]byte, len(args.Content)) 394 copy(censored, args.Content) 395 }) 396 censored = censorLocation(loc, censored) 397 } 398 } 399 400 for _, match := range matched { 401 findings = append(findings, toFinding(match.Rule, match.Location, censored)) 402 } 403 404 if len(findings) == 0 { 405 return types.Secret{} 406 } 407 408 sort.Slice(findings, func(i, j int) bool { 409 if findings[i].RuleID != findings[j].RuleID { 410 return findings[i].RuleID < findings[j].RuleID 411 } 412 return findings[i].Match < findings[j].Match 413 }) 414 415 return types.Secret{ 416 FilePath: args.FilePath, 417 Findings: findings, 418 } 419 } 420 421 func censorLocation(loc Location, input []byte) []byte { 422 return append( 423 input[:loc.Start], 424 append( 425 bytes.Repeat([]byte("*"), loc.End-loc.Start), 426 input[loc.End:]..., 427 )..., 428 ) 429 } 430 431 func toFinding(rule Rule, loc Location, content []byte) types.SecretFinding { 432 startLine, endLine, code, matchLine := findLocation(loc.Start, loc.End, content) 433 434 return types.SecretFinding{ 435 RuleID: rule.ID, 436 Category: rule.Category, 437 Severity: lo.Ternary(rule.Severity == "", "UNKNOWN", rule.Severity), 438 Title: rule.Title, 439 Match: matchLine, 440 StartLine: startLine, 441 EndLine: endLine, 442 Code: code, 443 } 444 } 445 446 const secretHighlightRadius = 2 // number of lines above + below each secret to include in code output 447 448 func findLocation(start, end int, content []byte) (int, int, types.Code, string) { 449 startLineNum := bytes.Count(content[:start], lineSep) 450 451 lineStart := bytes.LastIndex(content[:start], lineSep) 452 if lineStart == -1 { 453 lineStart = 0 454 } else { 455 lineStart += 1 456 } 457 458 lineEnd := bytes.Index(content[start:], lineSep) 459 if lineEnd == -1 { 460 lineEnd = len(content) 461 } else { 462 lineEnd += start 463 } 464 465 if lineEnd-lineStart > 100 { 466 lineStart = lo.Ternary(start-30 < 0, 0, start-30) 467 lineEnd = lo.Ternary(end+20 > len(content), len(content), end+20) 468 } 469 matchLine := string(content[lineStart:lineEnd]) 470 endLineNum := startLineNum + bytes.Count(content[start:end], lineSep) 471 472 var code types.Code 473 474 lines := bytes.Split(content, lineSep) 475 codeStart := lo.Ternary(startLineNum-secretHighlightRadius < 0, 0, startLineNum-secretHighlightRadius) 476 codeEnd := lo.Ternary(endLineNum+secretHighlightRadius > len(lines), len(lines), endLineNum+secretHighlightRadius) 477 478 rawLines := lines[codeStart:codeEnd] 479 var foundFirst bool 480 for i, rawLine := range rawLines { 481 strRawLine := string(rawLine) 482 realLine := codeStart + i 483 inCause := realLine >= startLineNum && realLine <= endLineNum 484 code.Lines = append(code.Lines, types.Line{ 485 Number: codeStart + i + 1, 486 Content: strRawLine, 487 IsCause: inCause, 488 Highlighted: strRawLine, 489 FirstCause: !foundFirst && inCause, 490 LastCause: false, 491 }) 492 foundFirst = foundFirst || inCause 493 } 494 if len(code.Lines) > 0 { 495 for i := len(code.Lines) - 1; i >= 0; i-- { 496 if code.Lines[i].IsCause { 497 code.Lines[i].LastCause = true 498 break 499 } 500 } 501 } 502 503 return startLineNum + 1, endLineNum + 1, code, matchLine 504 }