github.com/google/osv-scalibr@v0.4.1/veles/secrets/hcp/detector.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package hcp
    16  
    17  import (
    18  	"regexp"
    19  	"slices"
    20  	"strings"
    21  
    22  	"github.com/google/osv-scalibr/veles"
    23  	jwtlib "github.com/google/osv-scalibr/veles/secrets/common/jwt"
    24  	"github.com/google/osv-scalibr/veles/secrets/common/pair"
    25  )
    26  
    27  const (
    28  	// maxSecretLen is a broad upper bound of the maximum length that hcp_client_id and hcp_client_secret can have
    29  	maxSecretLen = 80
    30  	// maxPairWindowLen is the maximum window length to pair env-style credentials.
    31  	maxPairWindowLen = 10 * 1 << 10 // 10 KiB
    32  	// maxAccessTokenLen is the maximum length of a JWT token (delegated to common jwt limits).
    33  	maxAccessTokenLen = jwtlib.MaxTokenLength
    34  )
    35  
    36  var (
    37  	// reClientID is a regular expression that matches HCP client IDs from env vars or strings.
    38  	reClientID = regexp.MustCompile(`["']?\b(?:HCP_CLIENT_ID|hcp_client_id)\b["']?\s*[:=]\s*["']?([A-Za-z0-9]{32})["']?`)
    39  	// reClientSec is a regular expression that matches HCP client secrets from env vars or strings.
    40  	reClientSec = regexp.MustCompile(`["']?\b(?:HCP_CLIENT_SECRET|hcp_client_secret)\b["']?\s*[:=]\s*["']?([A-Za-z0-9._~\-]{64})["']?`)
    41  )
    42  
    43  // NewPairDetector returns a Detector that finds HCP client credentials from key/value pairs.
    44  func NewPairDetector() veles.Detector {
    45  	return &pair.Detector{
    46  		MaxElementLen: maxSecretLen, MaxDistance: maxPairWindowLen,
    47  		FindA: findMatches(reClientID), FindB: findMatches(reClientSec),
    48  		FromPair: func(p pair.Pair) (veles.Secret, bool) {
    49  			return ClientCredentials{ClientID: string(p.A.Value), ClientSecret: string(p.B.Value)}, true
    50  		},
    51  		FromPartialPair: func(p pair.Pair) (veles.Secret, bool) {
    52  			if p.A == nil {
    53  				return ClientCredentials{ClientSecret: string(p.B.Value)}, true
    54  			}
    55  			return ClientCredentials{ClientID: string(p.A.Value)}, true
    56  		},
    57  	}
    58  }
    59  
    60  // findMatches returns the start offsets and captured group values for all matches of re in data.
    61  func findMatches(re *regexp.Regexp) func(data []byte) []*pair.Match {
    62  	return func(data []byte) []*pair.Match {
    63  		idxs := re.FindAllSubmatchIndex(data, -1)
    64  		if len(idxs) == 0 {
    65  			return nil
    66  		}
    67  		out := make([]*pair.Match, 0, len(idxs))
    68  		for _, m := range idxs {
    69  			// m[0], m[1] are the full-match bounds; m[2], m[3] are the first capture group bounds
    70  			out = append(out, &pair.Match{Start: m[2], Value: data[m[2]:m[3]]})
    71  		}
    72  		return out
    73  	}
    74  }
    75  
    76  // AccessTokenDetector finds HCP access tokens by scanning for JWTs and checking
    77  // JWT payload for HashiCorp issuer/audience hints.
    78  type AccessTokenDetector struct{}
    79  
    80  var _ veles.Detector = AccessTokenDetector{}
    81  
    82  // NewAccessTokenDetector returns a Detector that finds HCP access tokens from JWTs.
    83  func NewAccessTokenDetector() veles.Detector { return AccessTokenDetector{} }
    84  
    85  // MaxSecretLen implements veles.Detector and returns the maximum size of an
    86  // access token that the detector accounts for.
    87  func (AccessTokenDetector) MaxSecretLen() uint32 { return maxAccessTokenLen }
    88  
    89  // Detect implements veles.Detector and returns AccessToken secrets for JWTs
    90  // whose payload looks like HCP.
    91  func (AccessTokenDetector) Detect(data []byte) ([]veles.Secret, []int) {
    92  	var secrets []veles.Secret
    93  	var positions []int
    94  	tokens, poss := jwtlib.ExtractTokens(data)
    95  	for i, t := range tokens {
    96  		if isHCPAccessToken(t) {
    97  			secrets = append(secrets, AccessToken{Token: t.Raw()})
    98  			positions = append(positions, poss[i])
    99  		}
   100  	}
   101  	return secrets, positions
   102  }
   103  
   104  // isHCPAccessToken decodes the JWT header and payload and checks issuer/audience hints
   105  // consistent with HashiCorp Cloud Platform.
   106  func isHCPAccessToken(t jwtlib.Token) bool {
   107  	hdr := t.Header()
   108  	if typ, ok := hdr["typ"].(string); ok && !strings.EqualFold(typ, "JWT") {
   109  		return false
   110  	}
   111  	p := t.Payload()
   112  	iss, _ := p["iss"].(string)
   113  	if iss != "https://auth.idp.hashicorp.com/" {
   114  		return false
   115  	}
   116  	if aud, ok := p["aud"]; !ok || !slices.Contains(normalizeAud(aud), "https://api.hashicorp.cloud") {
   117  		return false
   118  	}
   119  	if gty, ok := p["gty"].(string); !ok || gty != "client-credentials" {
   120  		return false
   121  	}
   122  	return true
   123  }
   124  
   125  func normalizeAud(a any) []string {
   126  	switch v := a.(type) {
   127  	case string:
   128  		return []string{v}
   129  	case []any:
   130  		out := make([]string, 0, len(v))
   131  		for _, x := range v {
   132  			if s, ok := x.(string); ok {
   133  				out = append(out, s)
   134  			}
   135  		}
   136  		return out
   137  	default:
   138  		return nil
   139  	}
   140  }