github.com/google/osv-scalibr@v0.4.1/veles/secrets/tinkkeyset/detector.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tinkkeyset 16 17 import ( 18 "bytes" 19 "encoding/base64" 20 "regexp" 21 22 "github.com/google/osv-scalibr/veles" 23 24 "github.com/tink-crypto/tink-go/v2/insecurecleartextkeyset" 25 "github.com/tink-crypto/tink-go/v2/keyset" 26 ) 27 28 var ( 29 30 // base64Pattern is a generic pattern to detect base64 blobs 31 base64Pattern = regexp.MustCompile(`[A-Za-z0-9+/]{20,}=?=?`) 32 33 // jsonPattern matches correctly Tink keyset json strings 34 // thanks to the known `{"primaryKeyId":` start and `]}` ending 35 jsonPattern = regexp.MustCompile(`(?s)\s*\{\s*"primaryKeyId"\s*:\s*\d+,\s*"key"\s*:\s*\[\s*.*?\]\s*\}`) 36 37 // tinkTypeURL can be found in both binary and json tink keyset encodings 38 tinkTypeURL = []byte("type.googleapis.com/google.crypto.tink") 39 40 // minBase64Len is an estimate to reduce the number of blobs to decode 41 // note that: len(base64(tinkTypeUrl)) is roughly 50 chars 42 minBase64Len = 60 43 ) 44 45 // Detector is a Veles Detector that finds Tink plaintext keysets. 46 type Detector struct{} 47 48 // NewDetector returns a new Veles Detector that finds Tink plain text keysets 49 func NewDetector() veles.Detector { 50 return &Detector{} 51 } 52 53 // MaxSecretLen returns a conservative upper bound for the size of a secret in bytes. 54 // An exact number cannot be returned because Tink keysets may have arbitrary lengths, 55 func (d *Detector) MaxSecretLen() uint32 { 56 return 128 * 1 << 10 // 128 KiB 57 } 58 59 // Detect finds Tink plain text keysets in the given data 60 func (d *Detector) Detect(data []byte) ([]veles.Secret, []int) { 61 res := []veles.Secret{} 62 pos := []int{} 63 64 // search for secrets inside base64 blobs 65 for _, m := range base64Pattern.FindAllIndex(data, -1) { 66 l, r := m[0], m[1] 67 if (r - l) < minBase64Len { 68 continue 69 } 70 71 decoded := make([]byte, base64.StdEncoding.DecodedLen(r-l)) 72 n, err := base64.StdEncoding.Decode(decoded, data[l:r]) 73 if err != nil || !bytes.Contains(decoded[:n], tinkTypeURL) { 74 continue 75 } 76 77 b64Found, _ := find(decoded[:n]) 78 // use the start of the base 79 for _, found := range b64Found { 80 res = append(res, found) 81 pos = append(pos, l) 82 } 83 } 84 85 // search for plain secrets 86 if !bytes.Contains(data, tinkTypeURL) { 87 return res, nil 88 } 89 90 plainFound, plainPos := find(data) 91 res = append(res, plainFound...) 92 pos = append(pos, plainPos...) 93 94 return res, pos 95 } 96 97 func find(buf []byte) ([]veles.Secret, []int) { 98 res, pos := findJSON(buf) 99 if len(res) != 0 { 100 return res, pos 101 } 102 return findBinary(buf) 103 } 104 105 // findBinary extract at most one binary encoded Tink keyset inside the provided buffer 106 // 107 // this function works only if the input is exactly a binary encoded Tink keyset 108 func findBinary(buf []byte) ([]veles.Secret, []int) { 109 hnd, err := insecurecleartextkeyset.Read(keyset.NewBinaryReader(bytes.NewBuffer(buf))) 110 if err != nil { 111 return nil, nil 112 } 113 // Valid binary keyset found, convert it to a JSON string for consistent output. 114 bufOut := new(bytes.Buffer) 115 if err := insecurecleartextkeyset.Write(hnd, keyset.NewJSONWriter(bufOut)); err != nil { 116 return nil, nil 117 } 118 return []veles.Secret{TinkKeySet{Content: bufOut.String()}}, []int{0} 119 } 120 121 // findJSON searches for json encoded Tink keyset and extracts them 122 func findJSON(buf []byte) ([]veles.Secret, []int) { 123 res := []veles.Secret{} 124 pos := []int{} 125 cleaned := clean(buf) 126 for _, m := range jsonPattern.FindAllIndex(cleaned, -1) { 127 l, r := m[0], m[1] 128 jsonBuf := cleaned[l:r] 129 hnd, err := insecurecleartextkeyset.Read(keyset.NewJSONReader(bytes.NewBuffer(jsonBuf))) 130 if err != nil { 131 continue 132 } 133 // Valid keyset found, convert it back to a canonical JSON string for consistent output. 134 bufOut := new(bytes.Buffer) 135 if err := insecurecleartextkeyset.Write(hnd, keyset.NewJSONWriter(bufOut)); err != nil { 136 return nil, nil 137 } 138 res = append(res, TinkKeySet{Content: bufOut.String()}) 139 pos = append(pos, l) 140 } 141 return res, pos 142 } 143 144 // clean removes all levels of escaping from a given buffer by eliminating every backslash character. 145 // 146 // This function is designed specifically for this detector's purpose and 147 // should not be used if your output is expected to contain backslashes 148 func clean(s []byte) []byte { 149 if len(s) == 0 { 150 return s 151 } 152 var b bytes.Buffer 153 skip := false 154 for i := range len(s) - 1 { 155 if skip { 156 skip = false 157 continue 158 } 159 c := s[i] 160 if c == '\\' { 161 if s[i+1] == 'n' { 162 b.WriteByte('\n') 163 skip = true 164 } 165 continue 166 } 167 b.WriteByte(c) 168 } 169 if !skip && s[len(s)-1] != '\\' { 170 b.WriteByte(s[len(s)-1]) 171 } 172 return b.Bytes() 173 }