github.com/google/osv-scalibr@v0.4.1/veles/secrets/hashicorpvault/detector.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package hashicorpvault 16 17 import ( 18 "regexp" 19 "strings" 20 21 "github.com/google/osv-scalibr/veles" 22 "github.com/google/osv-scalibr/veles/secrets/common/simpletoken" 23 ) 24 25 // maxTokenLength is the maximum size of a Vault token. 26 const maxTokenLength = 200 27 28 // maxUUIDLength is the maximum size of a UUID (AppRole credential). 29 const maxUUIDLength = 36 30 31 // vaultTokenRe is a regular expression that matches HashiCorp Vault tokens. 32 // Vault tokens can start with older prefixes (s., b., r.) or newer prefixes (hvs., hvb.) followed by base64-like characters. 33 var vaultTokenRe = regexp.MustCompile(`(?:hv[sb]|[sbr])\.[A-Za-z0-9_-]{24,}`) 34 35 // appRoleCredentialRe is a regular expression that matches UUID v4 format used for AppRole credentials. 36 // UUIDs have the format: 8-4-4-4-12 hexadecimal digits separated by hyphens. 37 var appRoleCredentialRe = regexp.MustCompile(`[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}`) 38 39 // appRoleContextRe matches potential AppRole credential pairs with context labels. 40 // This matches patterns like "role_id: uuid", "ROLE_ID=uuid", "secret_id: uuid" etc. 41 var appRoleContextRe = regexp.MustCompile(`(?i)(role_id|secret_id)\s*[:\s=]\s*([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})`) 42 43 // appRoleDetector implements context-aware AppRole credential detection. 44 type appRoleDetector struct { 45 maxUUIDLen uint32 46 maxContextLen uint32 // Maximum distance to look for context 47 } 48 49 // NewTokenDetector returns a new simpletoken.Detector that matches HashiCorp Vault tokens. 50 func NewTokenDetector() veles.Detector { 51 return simpletoken.Detector{ 52 MaxLen: maxTokenLength, 53 Re: vaultTokenRe, 54 FromMatch: func(b []byte) (veles.Secret, bool) { 55 return Token{Token: string(b)}, true 56 }, 57 } 58 } 59 60 // NewAppRoleDetector returns a context-aware detector that matches UUID-formatted AppRole credentials. 61 // This detector analyzes surrounding context to identify role_id and secret_id pairs when possible, 62 // falling back to individual UUID detection when context is unclear. 63 func NewAppRoleDetector() veles.Detector { 64 return &appRoleDetector{ 65 maxUUIDLen: maxUUIDLength, 66 maxContextLen: 500, // Look up to 500 bytes around each UUID for context 67 } 68 } 69 70 // MaxSecretLen returns the maximum length of secrets this detector can find. 71 func (d *appRoleDetector) MaxSecretLen() uint32 { 72 return d.maxContextLen 73 } 74 75 // Detect implements context-aware AppRole credential detection. 76 func (d *appRoleDetector) Detect(data []byte) ([]veles.Secret, []int) { 77 var secrets []veles.Secret 78 var positions []int 79 80 // First, try to find context-aware credential pairs 81 contextMatches := appRoleContextRe.FindAllSubmatchIndex(data, -1) 82 processedUUIDs := make(map[string]bool) 83 84 // Group matches by proximity to find potential pairs 85 credentialPairs := groupCredentialsByProximity(data, contextMatches, processedUUIDs) 86 87 for _, pair := range credentialPairs { 88 secrets = append(secrets, pair.credentials) 89 positions = append(positions, pair.position) 90 } 91 92 // Then find standalone UUIDs that weren't part of context matches 93 uuidMatches := appRoleCredentialRe.FindAllSubmatchIndex(data, -1) 94 for _, match := range uuidMatches { 95 start, end := match[0], match[1] 96 uuid := string(data[start:end]) 97 98 if !processedUUIDs[uuid] { 99 secrets = append(secrets, AppRoleCredentials{ID: uuid}) 100 positions = append(positions, start) 101 } 102 } 103 104 return secrets, positions 105 } 106 107 // credentialPair represents a detected AppRole credential pair with its position. 108 type credentialPair struct { 109 credentials AppRoleCredentials 110 position int 111 } 112 113 // groupCredentialsByProximity analyzes context matches to group role_id/secret_id pairs. 114 func groupCredentialsByProximity(data []byte, matches [][]int, processedUUIDs map[string]bool) []credentialPair { 115 var pairs []credentialPair 116 117 // Convert matches to a more workable format 118 type contextMatch struct { 119 fieldType string // "role_id" or "secret_id" 120 uuid string 121 position int 122 } 123 124 var contextMatches []contextMatch 125 for _, match := range matches { 126 if len(match) >= 6 { // Now we have 3 capture groups: full match, field type, UUID 127 fieldType := strings.ToLower(string(data[match[2]:match[3]])) 128 uuid := string(data[match[4]:match[5]]) 129 130 contextMatches = append(contextMatches, contextMatch{ 131 fieldType: fieldType, 132 uuid: uuid, 133 position: match[0], 134 }) 135 processedUUIDs[uuid] = true 136 } 137 } 138 139 // Group nearby matches into credential pairs 140 for i, match1 := range contextMatches { 141 if match1.fieldType == "role_id" { 142 // Look for a nearby secret_id 143 for j, match2 := range contextMatches { 144 if i != j && match2.fieldType == "secret_id" { 145 // Check if they're within reasonable proximity (e.g., within 200 bytes) 146 distance := abs(match1.position - match2.position) 147 if distance < 200 { 148 pairs = append(pairs, credentialPair{ 149 credentials: AppRoleCredentials{ 150 RoleID: match1.uuid, 151 SecretID: match2.uuid, 152 }, 153 position: minInt(match1.position, match2.position), 154 }) 155 break 156 } 157 } 158 } 159 } 160 } 161 162 // Add standalone context matches that didn't form pairs 163 usedInPairs := make(map[string]bool) 164 for _, pair := range pairs { 165 usedInPairs[pair.credentials.RoleID] = true 166 usedInPairs[pair.credentials.SecretID] = true 167 } 168 169 for _, match := range contextMatches { 170 if !usedInPairs[match.uuid] { 171 var creds AppRoleCredentials 172 if match.fieldType == "role_id" { 173 creds.RoleID = match.uuid 174 } else { 175 creds.SecretID = match.uuid 176 } 177 pairs = append(pairs, credentialPair{ 178 credentials: creds, 179 position: match.position, 180 }) 181 } 182 } 183 184 return pairs 185 } 186 187 // Helper functions 188 func abs(x int) int { 189 if x < 0 { 190 return -x 191 } 192 return x 193 } 194 195 func minInt(a, b int) int { 196 if a < b { 197 return a 198 } 199 return b 200 }