github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/file/cataloger/secrets/secrets_search_by_line_strategy.go (about)

     1  package secrets
     2  
     3  import (
     4  	"bufio"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"regexp"
     9  
    10  	"github.com/anchore/syft/internal"
    11  	"github.com/anchore/syft/syft/file"
    12  )
    13  
    14  func catalogLocationByLine(resolver file.Resolver, location file.Location, patterns map[string]*regexp.Regexp) ([]file.SearchResult, error) {
    15  	readCloser, err := resolver.FileContentsByLocation(location)
    16  	if err != nil {
    17  		return nil, fmt.Errorf("unable to fetch reader for location=%q : %w", location, err)
    18  	}
    19  	defer internal.CloseAndLogError(readCloser, location.VirtualPath)
    20  
    21  	var scanner = bufio.NewReader(readCloser)
    22  	var position int64
    23  	var allSecrets []file.SearchResult
    24  	var lineNo int64
    25  	var readErr error
    26  	for !errors.Is(readErr, io.EOF) {
    27  		lineNo++
    28  		var line []byte
    29  		// TODO: we're at risk of large memory usage for very long lines
    30  		line, readErr = scanner.ReadBytes('\n')
    31  		if readErr != nil && readErr != io.EOF {
    32  			return nil, readErr
    33  		}
    34  
    35  		lineSecrets, err := searchForSecretsWithinLine(resolver, location, patterns, line, lineNo, position)
    36  		if err != nil {
    37  			return nil, err
    38  		}
    39  		position += int64(len(line))
    40  		allSecrets = append(allSecrets, lineSecrets...)
    41  	}
    42  
    43  	return allSecrets, nil
    44  }
    45  
    46  func searchForSecretsWithinLine(resolver file.Resolver, location file.Location, patterns map[string]*regexp.Regexp, line []byte, lineNo int64, position int64) ([]file.SearchResult, error) {
    47  	var secrets []file.SearchResult
    48  	for name, pattern := range patterns {
    49  		matches := pattern.FindAllIndex(line, -1)
    50  		for i, match := range matches {
    51  			if i%2 == 1 {
    52  				// FindAllIndex returns pairs of numbers for each match, we are only interested in the starting (first)
    53  				// position in each pair.
    54  				continue
    55  			}
    56  
    57  			lineOffset := int64(match[0])
    58  			seekLocation := position + lineOffset
    59  			reader, err := readerAtPosition(resolver, location, seekLocation)
    60  			if err != nil {
    61  				return nil, err
    62  			}
    63  
    64  			secret := extractSecretFromPosition(reader, name, pattern, lineNo, lineOffset, seekLocation)
    65  			if secret != nil {
    66  				secrets = append(secrets, *secret)
    67  			}
    68  			internal.CloseAndLogError(reader, location.VirtualPath)
    69  		}
    70  	}
    71  
    72  	return secrets, nil
    73  }
    74  
    75  func readerAtPosition(resolver file.Resolver, location file.Location, seekPosition int64) (io.ReadCloser, error) {
    76  	readCloser, err := resolver.FileContentsByLocation(location)
    77  	if err != nil {
    78  		return nil, fmt.Errorf("unable to fetch reader for location=%q : %w", location, err)
    79  	}
    80  	if seekPosition > 0 {
    81  		n, err := io.CopyN(io.Discard, readCloser, seekPosition)
    82  		if err != nil {
    83  			return nil, fmt.Errorf("unable to read contents for location=%q while searching for secrets: %w", location, err)
    84  		}
    85  		if n != seekPosition {
    86  			return nil, fmt.Errorf("unexpected seek location for location=%q while searching for secrets: %d != %d", location, n, seekPosition)
    87  		}
    88  	}
    89  	return readCloser, nil
    90  }
    91  
    92  func extractSecretFromPosition(readCloser io.ReadCloser, name string, pattern *regexp.Regexp, lineNo, lineOffset, seekPosition int64) *file.SearchResult {
    93  	reader := &newlineCounter{RuneReader: bufio.NewReader(readCloser)}
    94  	positions := pattern.FindReaderSubmatchIndex(reader)
    95  	if len(positions) == 0 {
    96  		// no matches found
    97  		return nil
    98  	}
    99  
   100  	index := pattern.SubexpIndex("value")
   101  	var indexOffset int
   102  	if index != -1 {
   103  		// there is a capture group, use the capture group selection as the secret value. To do this we want to
   104  		// use the position at the discovered offset. Note: all positions come in pairs, so you will need to adjust
   105  		// the offset accordingly (multiply by 2).
   106  		indexOffset = index * 2
   107  	}
   108  	// get the start and stop of the secret value. Note: this covers both when there is a capture group
   109  	// and when there is not a capture group (full value match)
   110  	start, stop := int64(positions[indexOffset]), int64(positions[indexOffset+1])
   111  
   112  	if start < 0 || stop < 0 {
   113  		// no match location found. This can happen when there is a value capture group specified by the user
   114  		// and there was a match on the overall regex, but not for the capture group (which is possible if the capture
   115  		// group is optional).
   116  		return nil
   117  	}
   118  
   119  	// lineNoOfSecret are the number of lines which occur before the start of the secret value
   120  	var lineNoOfSecret = lineNo + int64(reader.newlinesBefore(start))
   121  	// lineOffsetOfSecret are the number of bytes that occur after the last newline but before the secret value.
   122  	var lineOffsetOfSecret = start - reader.newlinePositionBefore(start)
   123  	if lineNoOfSecret == lineNo {
   124  		// the secret value starts in the same line as the overall match, so we must consider that line offset
   125  		lineOffsetOfSecret += lineOffset
   126  	}
   127  
   128  	return &file.SearchResult{
   129  		Classification: name,
   130  		SeekPosition:   start + seekPosition,
   131  		Length:         stop - start,
   132  		LineNumber:     lineNoOfSecret,
   133  		LineOffset:     lineOffsetOfSecret,
   134  	}
   135  }