github.com/google/osv-scalibr@v0.4.1/veles/secrets/hashicorpvault/detector.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package hashicorpvault
    16  
    17  import (
    18  	"regexp"
    19  	"strings"
    20  
    21  	"github.com/google/osv-scalibr/veles"
    22  	"github.com/google/osv-scalibr/veles/secrets/common/simpletoken"
    23  )
    24  
    25  // maxTokenLength is the maximum size of a Vault token.
    26  const maxTokenLength = 200
    27  
    28  // maxUUIDLength is the maximum size of a UUID (AppRole credential).
    29  const maxUUIDLength = 36
    30  
    31  // vaultTokenRe is a regular expression that matches HashiCorp Vault tokens.
    32  // Vault tokens can start with older prefixes (s., b., r.) or newer prefixes (hvs., hvb.) followed by base64-like characters.
    33  var vaultTokenRe = regexp.MustCompile(`(?:hv[sb]|[sbr])\.[A-Za-z0-9_-]{24,}`)
    34  
    35  // appRoleCredentialRe is a regular expression that matches UUID v4 format used for AppRole credentials.
    36  // UUIDs have the format: 8-4-4-4-12 hexadecimal digits separated by hyphens.
    37  var appRoleCredentialRe = regexp.MustCompile(`[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}`)
    38  
    39  // appRoleContextRe matches potential AppRole credential pairs with context labels.
    40  // This matches patterns like "role_id: uuid", "ROLE_ID=uuid", "secret_id: uuid" etc.
    41  var appRoleContextRe = regexp.MustCompile(`(?i)(role_id|secret_id)\s*[:\s=]\s*([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})`)
    42  
    43  // appRoleDetector implements context-aware AppRole credential detection.
    44  type appRoleDetector struct {
    45  	maxUUIDLen    uint32
    46  	maxContextLen uint32 // Maximum distance to look for context
    47  }
    48  
    49  // NewTokenDetector returns a new simpletoken.Detector that matches HashiCorp Vault tokens.
    50  func NewTokenDetector() veles.Detector {
    51  	return simpletoken.Detector{
    52  		MaxLen: maxTokenLength,
    53  		Re:     vaultTokenRe,
    54  		FromMatch: func(b []byte) (veles.Secret, bool) {
    55  			return Token{Token: string(b)}, true
    56  		},
    57  	}
    58  }
    59  
    60  // NewAppRoleDetector returns a context-aware detector that matches UUID-formatted AppRole credentials.
    61  // This detector analyzes surrounding context to identify role_id and secret_id pairs when possible,
    62  // falling back to individual UUID detection when context is unclear.
    63  func NewAppRoleDetector() veles.Detector {
    64  	return &appRoleDetector{
    65  		maxUUIDLen:    maxUUIDLength,
    66  		maxContextLen: 500, // Look up to 500 bytes around each UUID for context
    67  	}
    68  }
    69  
    70  // MaxSecretLen returns the maximum length of secrets this detector can find.
    71  func (d *appRoleDetector) MaxSecretLen() uint32 {
    72  	return d.maxContextLen
    73  }
    74  
    75  // Detect implements context-aware AppRole credential detection.
    76  func (d *appRoleDetector) Detect(data []byte) ([]veles.Secret, []int) {
    77  	var secrets []veles.Secret
    78  	var positions []int
    79  
    80  	// First, try to find context-aware credential pairs
    81  	contextMatches := appRoleContextRe.FindAllSubmatchIndex(data, -1)
    82  	processedUUIDs := make(map[string]bool)
    83  
    84  	// Group matches by proximity to find potential pairs
    85  	credentialPairs := groupCredentialsByProximity(data, contextMatches, processedUUIDs)
    86  
    87  	for _, pair := range credentialPairs {
    88  		secrets = append(secrets, pair.credentials)
    89  		positions = append(positions, pair.position)
    90  	}
    91  
    92  	// Then find standalone UUIDs that weren't part of context matches
    93  	uuidMatches := appRoleCredentialRe.FindAllSubmatchIndex(data, -1)
    94  	for _, match := range uuidMatches {
    95  		start, end := match[0], match[1]
    96  		uuid := string(data[start:end])
    97  
    98  		if !processedUUIDs[uuid] {
    99  			secrets = append(secrets, AppRoleCredentials{ID: uuid})
   100  			positions = append(positions, start)
   101  		}
   102  	}
   103  
   104  	return secrets, positions
   105  }
   106  
   107  // credentialPair represents a detected AppRole credential pair with its position.
   108  type credentialPair struct {
   109  	credentials AppRoleCredentials
   110  	position    int
   111  }
   112  
   113  // groupCredentialsByProximity analyzes context matches to group role_id/secret_id pairs.
   114  func groupCredentialsByProximity(data []byte, matches [][]int, processedUUIDs map[string]bool) []credentialPair {
   115  	var pairs []credentialPair
   116  
   117  	// Convert matches to a more workable format
   118  	type contextMatch struct {
   119  		fieldType string // "role_id" or "secret_id"
   120  		uuid      string
   121  		position  int
   122  	}
   123  
   124  	var contextMatches []contextMatch
   125  	for _, match := range matches {
   126  		if len(match) >= 6 { // Now we have 3 capture groups: full match, field type, UUID
   127  			fieldType := strings.ToLower(string(data[match[2]:match[3]]))
   128  			uuid := string(data[match[4]:match[5]])
   129  
   130  			contextMatches = append(contextMatches, contextMatch{
   131  				fieldType: fieldType,
   132  				uuid:      uuid,
   133  				position:  match[0],
   134  			})
   135  			processedUUIDs[uuid] = true
   136  		}
   137  	}
   138  
   139  	// Group nearby matches into credential pairs
   140  	for i, match1 := range contextMatches {
   141  		if match1.fieldType == "role_id" {
   142  			// Look for a nearby secret_id
   143  			for j, match2 := range contextMatches {
   144  				if i != j && match2.fieldType == "secret_id" {
   145  					// Check if they're within reasonable proximity (e.g., within 200 bytes)
   146  					distance := abs(match1.position - match2.position)
   147  					if distance < 200 {
   148  						pairs = append(pairs, credentialPair{
   149  							credentials: AppRoleCredentials{
   150  								RoleID:   match1.uuid,
   151  								SecretID: match2.uuid,
   152  							},
   153  							position: minInt(match1.position, match2.position),
   154  						})
   155  						break
   156  					}
   157  				}
   158  			}
   159  		}
   160  	}
   161  
   162  	// Add standalone context matches that didn't form pairs
   163  	usedInPairs := make(map[string]bool)
   164  	for _, pair := range pairs {
   165  		usedInPairs[pair.credentials.RoleID] = true
   166  		usedInPairs[pair.credentials.SecretID] = true
   167  	}
   168  
   169  	for _, match := range contextMatches {
   170  		if !usedInPairs[match.uuid] {
   171  			var creds AppRoleCredentials
   172  			if match.fieldType == "role_id" {
   173  				creds.RoleID = match.uuid
   174  			} else {
   175  				creds.SecretID = match.uuid
   176  			}
   177  			pairs = append(pairs, credentialPair{
   178  				credentials: creds,
   179  				position:    match.position,
   180  			})
   181  		}
   182  	}
   183  
   184  	return pairs
   185  }
   186  
   187  // Helper functions
   188  func abs(x int) int {
   189  	if x < 0 {
   190  		return -x
   191  	}
   192  	return x
   193  }
   194  
   195  func minInt(a, b int) int {
   196  	if a < b {
   197  		return a
   198  	}
   199  	return b
   200  }