github.com/google/osv-scalibr@v0.4.1/veles/secrets/gcpsak/detector.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gcpsak 16 17 import ( 18 "encoding/base64" 19 "regexp" 20 21 "github.com/google/osv-scalibr/veles" 22 "github.com/google/osv-scalibr/veles/secrets/common/flatjson" 23 ) 24 25 var ( 26 // Matches on "auth_provider_x509_cert_url" somewhere between curly braces. 27 // This matches very broadly and gives us some vague "JSON context" that we 28 // can further refine on. 29 reJSON = regexp.MustCompile(`\{[^{]+auth_provider_x509_cert_url[^}]+\}`) 30 31 // Matches on a base64-encoded GCP service account key (in pretty JSON 32 // format). This relies on consistent field order and whitespace. 33 // That should be a reasonable heuristic. Note that this means we will miss 34 // (false negatives) base64-encoded GCP SAK that have been modified before 35 // encoding. 36 reBase64 = regexp.MustCompile(`ewogICJ0eXBlIjogInNlcnZpY2VfYWNjb3VudCIs[a-zA-Z0-9/+]{52,}`) 37 38 // requiredKeys are the keys that must be present in a candidate's JSON 39 // representation for the Detector to produce a result. 40 requiredKeys = []string{"private_key_id", "private_key", "client_email"} 41 42 // optionalKeys are not required to be present but are reported as part of a 43 // GCPSAK if they are. 44 optionalKeys = []string{ 45 "type", 46 "project_id", 47 "client_id", 48 "auth_uri", 49 "token_uri", 50 "auth_provider_x509_cert_url", 51 "client_x509_cert_url", 52 "universe_domain", 53 } 54 ) 55 56 var _ veles.Detector = NewDetector() 57 58 // Detector is a Veles Detector that finds GCP service account keys. 59 // 60 // It can find GCP SAK in plain JSON format (even if it's modified or escaped) 61 // and base64 encoded but only if it wasn't modified before encoding. 62 type Detector struct { 63 ex *flatjson.Extractor 64 withExtra bool 65 withPrivateKey bool 66 } 67 68 // NewDetector returns a new Veles Detector that finds GCP service account keys. 69 func NewDetector() *Detector { 70 return &Detector{ 71 ex: flatjson.NewExtractor(requiredKeys, nil), 72 withExtra: false, 73 } 74 } 75 76 // NewDetectorWithExtraFields returns a new Veles Detector that finds GCP 77 // service account keys and returns them with all their fields not just those 78 // needed for validation. 79 // 80 // If includePrivateKey is set, the result will also contain the raw private 81 // key. This should generally be avoided because it creates the risk of 82 // accidentally leaking the key. 83 func NewDetectorWithExtraFields(includePrivateKey bool) *Detector { 84 return &Detector{ 85 ex: flatjson.NewExtractor(requiredKeys, optionalKeys), 86 withExtra: true, 87 withPrivateKey: includePrivateKey, 88 } 89 } 90 91 // MaxSecretLen returns the maximum length a secret from this Detector can have. 92 // 93 // Since GCP SAK contain an entire PEM-encoded 2048 bit RSA private key, they 94 // can be pretty long. For now, we use 4 kiB just to be on the safe side. 95 func (d *Detector) MaxSecretLen() uint32 { 96 return 4096 97 } 98 99 // Detect finds candidate GCP SAK in the data and returns them alongside their 100 // starting positions. 101 func (d *Detector) Detect(data []byte) ([]veles.Secret, []int) { 102 sJSON, pJSON := d.detectJSON(data) 103 sB64, pB64 := d.detectB64(data) 104 return append(sJSON, sB64...), append(pJSON, pB64...) 105 } 106 107 func (d *Detector) detectJSON(data []byte) ([]veles.Secret, []int) { 108 var secrets []veles.Secret 109 var positions []int 110 for _, m := range reJSON.FindAllIndex(data, -1) { 111 l, r := m[0], m[1] 112 sak := d.extractJSON(data[l:r]) 113 if sak == nil { 114 continue 115 } 116 secrets = append(secrets, *sak) 117 positions = append(positions, l) 118 } 119 return secrets, positions 120 } 121 122 func (d *Detector) detectB64(data []byte) ([]veles.Secret, []int) { 123 var secrets []veles.Secret 124 var positions []int 125 for _, m := range reBase64.FindAllIndex(data, -1) { 126 l, r := m[0], m[1] 127 buf := data[l:r] 128 dec := make([]byte, base64.RawStdEncoding.DecodedLen(len(buf))) 129 n, err := base64.RawStdEncoding.Decode(dec, buf) 130 if err != nil { 131 continue 132 } 133 sak := d.extractJSON(dec[:n]) 134 if sak == nil { 135 continue 136 } 137 secrets = append(secrets, *sak) 138 positions = append(positions, l) 139 } 140 return secrets, positions 141 } 142 143 func (d *Detector) extractJSON(data []byte) *GCPSAK { 144 kv := d.ex.Extract(data) 145 if kv == nil { 146 return nil 147 } 148 sig := Sign(kv["private_key"]) 149 if sig == nil { 150 return nil 151 } 152 sak := &GCPSAK{ 153 PrivateKeyID: kv["private_key_id"], 154 ServiceAccount: kv["client_email"], 155 Signature: sig, 156 } 157 if d.withExtra { 158 sak.Extra = &ExtraFields{ 159 Type: kv["type"], 160 ProjectID: kv["project_id"], 161 ClientID: kv["client_id"], 162 AuthURI: kv["auth_uri"], 163 TokenURI: kv["token_uri"], 164 AuthProviderX509CertURL: kv["auth_provider_x509_cert_url"], 165 ClientX509CertURL: kv["client_x509_cert_url"], 166 UniverseDomain: kv["universe_domain"], 167 } 168 if d.withPrivateKey { 169 sak.Extra.PrivateKey = kv["private_key"] 170 } 171 } 172 return sak 173 }