github.com/google/osv-scalibr@v0.4.1/veles/secrets/recaptchakey/detector.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package recaptchakey
    16  
    17  import (
    18  	"bufio"
    19  	"bytes"
    20  	"regexp"
    21  	"slices"
    22  
    23  	"github.com/google/osv-scalibr/veles"
    24  )
    25  
    26  var (
    27  	// inlinePattern matches an inline assignment of a captcha secret key and captures its value (works for .env .json and .yaml)
    28  	//
    29  	// { and } are excluded to not overlap with inline json
    30  	inlinePattern = regexp.MustCompile(`(?i)captcha[^\{\}\n\r]{0,50}(?:secret|private)[^\{\}\n\r]{0,50}(6[A-Za-z0-9_\-'"]{39})\b`)
    31  	// jsonBlockPattern matches a json object with the key ending in captcha and then extract the value of a secret key
    32  	jsonBlockPattern = regexp.MustCompile(`(?i)captcha[._-a-zA-Z0-9]*\\*"\s?:\s?\{[^\}]*?(?:private|secret)[a-zA-Z_]*\\*['"]?\s?:\s?\\*['"]?(6[A-Za-z0-9_-]{39})\b`)
    33  	// yamlBlockPattern roughly searches for a yaml block with a secret key near it (leaving space before to check for indentation)
    34  	yamlBlockPattern = regexp.MustCompile(`(?i)\s*([a-zA-Z_]*captcha[._-a-zA-Z0-9]*:[\r\n]+)[\s\S]{0,300}(?:private|secret)[a-zA-Z_]*\s*:\s*['"]?(6[A-Za-z0-9_-]{39}\b)`)
    35  )
    36  
    37  const (
    38  	maxSecretLen  = 40
    39  	maxContextLen = 500
    40  	maxLen        = maxSecretLen + maxContextLen
    41  )
    42  
    43  type detector struct{}
    44  
    45  // NewDetector returns a reCAPTCHA secret keys detector
    46  func NewDetector() veles.Detector { return &detector{} }
    47  
    48  // Detect matches reCAPTCHA keys in config files,
    49  func (d *detector) Detect(data []byte) ([]veles.Secret, []int) {
    50  	matches := slices.Concat(
    51  		inlinePattern.FindAllSubmatchIndex(data, -1),
    52  		jsonBlockPattern.FindAllSubmatchIndex(data, -1),
    53  		findInsideYamlBlock(data),
    54  	)
    55  
    56  	var secrets []veles.Secret
    57  	var positions []int
    58  
    59  	// Process regex-based matches
    60  	for _, m := range matches {
    61  		start := m[len(m)-2]
    62  		end := m[len(m)-1]
    63  		if start == -1 || end == -1 {
    64  			continue
    65  		}
    66  		secrets = append(secrets, Key{Secret: string(data[start:end])})
    67  		positions = append(positions, start)
    68  	}
    69  	return secrets, positions
    70  }
    71  
    72  // MaxSecretLen returns the length a secret can have
    73  func (d *detector) MaxSecretLen() uint32 { return maxLen }
    74  
    75  // findInsideYamlBlock searches for inlineYamlPattern inside `captcha:` yaml blocks
    76  func findInsideYamlBlock(data []byte) [][]int {
    77  	matches := yamlBlockPattern.FindAllSubmatchIndex(data, -1)
    78  	matches = slices.DeleteFunc(matches, func(m []int) bool {
    79  		blockKeyIndent := m[2] - m[0] // distance between the full match and the (captcha) capture group
    80  		blockStart := m[3]            // end of key group, hence start of the block
    81  		end := m[1]                   // end of full match
    82  
    83  		r := bufio.NewScanner(bytes.NewReader(data[blockStart:end]))
    84  		for r.Scan() {
    85  			line := r.Bytes()
    86  			trimmed := bytes.TrimSpace(line)
    87  			// skip empty lines and comments
    88  			if len(trimmed) == 0 || trimmed[0] == '#' {
    89  				continue
    90  			}
    91  			// if the indent is less then the block's the key is in another block
    92  			if countIndent(line) <= blockKeyIndent {
    93  				return true
    94  			}
    95  		}
    96  		return false
    97  	})
    98  	return matches
    99  }
   100  
   101  // countIndent calculates the number of leading spaces or tabs in a byte slice.
   102  func countIndent(s []byte) int {
   103  	for i, r := range s {
   104  		if r != ' ' && r != '\t' {
   105  			return i
   106  		}
   107  	}
   108  	return len(s)
   109  }