github.com/google/osv-scalibr@v0.4.1/veles/secrets/gcpsak/detector.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gcpsak
    16  
    17  import (
    18  	"encoding/base64"
    19  	"regexp"
    20  
    21  	"github.com/google/osv-scalibr/veles"
    22  	"github.com/google/osv-scalibr/veles/secrets/common/flatjson"
    23  )
    24  
    25  var (
    26  	// Matches on "auth_provider_x509_cert_url" somewhere between curly braces.
    27  	// This matches very broadly and gives us some vague "JSON context" that we
    28  	// can further refine on.
    29  	reJSON = regexp.MustCompile(`\{[^{]+auth_provider_x509_cert_url[^}]+\}`)
    30  
    31  	// Matches on a base64-encoded GCP service account key (in pretty JSON
    32  	// format). This relies on consistent field order and whitespace.
    33  	// That should be a reasonable heuristic. Note that this means we will miss
    34  	// (false negatives) base64-encoded GCP SAK that have been modified before
    35  	// encoding.
    36  	reBase64 = regexp.MustCompile(`ewogICJ0eXBlIjogInNlcnZpY2VfYWNjb3VudCIs[a-zA-Z0-9/+]{52,}`)
    37  
    38  	// requiredKeys are the keys that must be present in a candidate's JSON
    39  	// representation for the Detector to produce a result.
    40  	requiredKeys = []string{"private_key_id", "private_key", "client_email"}
    41  
    42  	// optionalKeys are not required to be present but are reported as part of a
    43  	// GCPSAK if they are.
    44  	optionalKeys = []string{
    45  		"type",
    46  		"project_id",
    47  		"client_id",
    48  		"auth_uri",
    49  		"token_uri",
    50  		"auth_provider_x509_cert_url",
    51  		"client_x509_cert_url",
    52  		"universe_domain",
    53  	}
    54  )
    55  
    56  var _ veles.Detector = NewDetector()
    57  
    58  // Detector is a Veles Detector that finds GCP service account keys.
    59  //
    60  // It can find GCP SAK in plain JSON format (even if it's modified or escaped)
    61  // and base64 encoded but only if it wasn't modified before encoding.
    62  type Detector struct {
    63  	ex             *flatjson.Extractor
    64  	withExtra      bool
    65  	withPrivateKey bool
    66  }
    67  
    68  // NewDetector returns a new Veles Detector that finds GCP service account keys.
    69  func NewDetector() *Detector {
    70  	return &Detector{
    71  		ex:        flatjson.NewExtractor(requiredKeys, nil),
    72  		withExtra: false,
    73  	}
    74  }
    75  
    76  // NewDetectorWithExtraFields returns a new Veles Detector that finds GCP
    77  // service account keys and returns them with all their fields not just those
    78  // needed for validation.
    79  //
    80  // If includePrivateKey is set, the result will also contain the raw private
    81  // key. This should generally be avoided because it creates the risk of
    82  // accidentally leaking the key.
    83  func NewDetectorWithExtraFields(includePrivateKey bool) *Detector {
    84  	return &Detector{
    85  		ex:             flatjson.NewExtractor(requiredKeys, optionalKeys),
    86  		withExtra:      true,
    87  		withPrivateKey: includePrivateKey,
    88  	}
    89  }
    90  
    91  // MaxSecretLen returns the maximum length a secret from this Detector can have.
    92  //
    93  // Since GCP SAK contain an entire PEM-encoded 2048 bit RSA private key, they
    94  // can be pretty long. For now, we use 4 kiB just to be on the safe side.
    95  func (d *Detector) MaxSecretLen() uint32 {
    96  	return 4096
    97  }
    98  
    99  // Detect finds candidate GCP SAK in the data and returns them alongside their
   100  // starting positions.
   101  func (d *Detector) Detect(data []byte) ([]veles.Secret, []int) {
   102  	sJSON, pJSON := d.detectJSON(data)
   103  	sB64, pB64 := d.detectB64(data)
   104  	return append(sJSON, sB64...), append(pJSON, pB64...)
   105  }
   106  
   107  func (d *Detector) detectJSON(data []byte) ([]veles.Secret, []int) {
   108  	var secrets []veles.Secret
   109  	var positions []int
   110  	for _, m := range reJSON.FindAllIndex(data, -1) {
   111  		l, r := m[0], m[1]
   112  		sak := d.extractJSON(data[l:r])
   113  		if sak == nil {
   114  			continue
   115  		}
   116  		secrets = append(secrets, *sak)
   117  		positions = append(positions, l)
   118  	}
   119  	return secrets, positions
   120  }
   121  
   122  func (d *Detector) detectB64(data []byte) ([]veles.Secret, []int) {
   123  	var secrets []veles.Secret
   124  	var positions []int
   125  	for _, m := range reBase64.FindAllIndex(data, -1) {
   126  		l, r := m[0], m[1]
   127  		buf := data[l:r]
   128  		dec := make([]byte, base64.RawStdEncoding.DecodedLen(len(buf)))
   129  		n, err := base64.RawStdEncoding.Decode(dec, buf)
   130  		if err != nil {
   131  			continue
   132  		}
   133  		sak := d.extractJSON(dec[:n])
   134  		if sak == nil {
   135  			continue
   136  		}
   137  		secrets = append(secrets, *sak)
   138  		positions = append(positions, l)
   139  	}
   140  	return secrets, positions
   141  }
   142  
   143  func (d *Detector) extractJSON(data []byte) *GCPSAK {
   144  	kv := d.ex.Extract(data)
   145  	if kv == nil {
   146  		return nil
   147  	}
   148  	sig := Sign(kv["private_key"])
   149  	if sig == nil {
   150  		return nil
   151  	}
   152  	sak := &GCPSAK{
   153  		PrivateKeyID:   kv["private_key_id"],
   154  		ServiceAccount: kv["client_email"],
   155  		Signature:      sig,
   156  	}
   157  	if d.withExtra {
   158  		sak.Extra = &ExtraFields{
   159  			Type:                    kv["type"],
   160  			ProjectID:               kv["project_id"],
   161  			ClientID:                kv["client_id"],
   162  			AuthURI:                 kv["auth_uri"],
   163  			TokenURI:                kv["token_uri"],
   164  			AuthProviderX509CertURL: kv["auth_provider_x509_cert_url"],
   165  			ClientX509CertURL:       kv["client_x509_cert_url"],
   166  			UniverseDomain:          kv["universe_domain"],
   167  		}
   168  		if d.withPrivateKey {
   169  			sak.Extra.PrivateKey = kv["private_key"]
   170  		}
   171  	}
   172  	return sak
   173  }