github.com/google/osv-scalibr@v0.4.1/veles/secrets/tinkkeyset/detector.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tinkkeyset
    16  
    17  import (
    18  	"bytes"
    19  	"encoding/base64"
    20  	"regexp"
    21  
    22  	"github.com/google/osv-scalibr/veles"
    23  
    24  	"github.com/tink-crypto/tink-go/v2/insecurecleartextkeyset"
    25  	"github.com/tink-crypto/tink-go/v2/keyset"
    26  )
    27  
    28  var (
    29  
    30  	// base64Pattern is a generic pattern to detect base64 blobs
    31  	base64Pattern = regexp.MustCompile(`[A-Za-z0-9+/]{20,}=?=?`)
    32  
    33  	// jsonPattern matches correctly Tink keyset json strings
    34  	// thanks to the known `{"primaryKeyId":` start and `]}` ending
    35  	jsonPattern = regexp.MustCompile(`(?s)\s*\{\s*"primaryKeyId"\s*:\s*\d+,\s*"key"\s*:\s*\[\s*.*?\]\s*\}`)
    36  
    37  	// tinkTypeURL can be found in both binary and json tink keyset encodings
    38  	tinkTypeURL = []byte("type.googleapis.com/google.crypto.tink")
    39  
    40  	// minBase64Len is an estimate to reduce the number of blobs to decode
    41  	// note that: len(base64(tinkTypeUrl)) is roughly 50 chars
    42  	minBase64Len = 60
    43  )
    44  
    45  // Detector is a Veles Detector that finds Tink plaintext keysets.
    46  type Detector struct{}
    47  
    48  // NewDetector returns a new Veles Detector that finds Tink plain text keysets
    49  func NewDetector() veles.Detector {
    50  	return &Detector{}
    51  }
    52  
    53  // MaxSecretLen returns a conservative upper bound for the size of a secret in bytes.
    54  // An exact number cannot be returned because Tink keysets may have arbitrary lengths,
    55  func (d *Detector) MaxSecretLen() uint32 {
    56  	return 128 * 1 << 10 // 128 KiB
    57  }
    58  
    59  // Detect finds Tink plain text keysets in the given data
    60  func (d *Detector) Detect(data []byte) ([]veles.Secret, []int) {
    61  	res := []veles.Secret{}
    62  	pos := []int{}
    63  
    64  	// search for secrets inside base64 blobs
    65  	for _, m := range base64Pattern.FindAllIndex(data, -1) {
    66  		l, r := m[0], m[1]
    67  		if (r - l) < minBase64Len {
    68  			continue
    69  		}
    70  
    71  		decoded := make([]byte, base64.StdEncoding.DecodedLen(r-l))
    72  		n, err := base64.StdEncoding.Decode(decoded, data[l:r])
    73  		if err != nil || !bytes.Contains(decoded[:n], tinkTypeURL) {
    74  			continue
    75  		}
    76  
    77  		b64Found, _ := find(decoded[:n])
    78  		// use the start of the base
    79  		for _, found := range b64Found {
    80  			res = append(res, found)
    81  			pos = append(pos, l)
    82  		}
    83  	}
    84  
    85  	// search for plain secrets
    86  	if !bytes.Contains(data, tinkTypeURL) {
    87  		return res, nil
    88  	}
    89  
    90  	plainFound, plainPos := find(data)
    91  	res = append(res, plainFound...)
    92  	pos = append(pos, plainPos...)
    93  
    94  	return res, pos
    95  }
    96  
    97  func find(buf []byte) ([]veles.Secret, []int) {
    98  	res, pos := findJSON(buf)
    99  	if len(res) != 0 {
   100  		return res, pos
   101  	}
   102  	return findBinary(buf)
   103  }
   104  
   105  // findBinary extract at most one binary encoded Tink keyset inside the provided buffer
   106  //
   107  // this function works only if the input is exactly a binary encoded Tink keyset
   108  func findBinary(buf []byte) ([]veles.Secret, []int) {
   109  	hnd, err := insecurecleartextkeyset.Read(keyset.NewBinaryReader(bytes.NewBuffer(buf)))
   110  	if err != nil {
   111  		return nil, nil
   112  	}
   113  	// Valid binary keyset found, convert it to a JSON string for consistent output.
   114  	bufOut := new(bytes.Buffer)
   115  	if err := insecurecleartextkeyset.Write(hnd, keyset.NewJSONWriter(bufOut)); err != nil {
   116  		return nil, nil
   117  	}
   118  	return []veles.Secret{TinkKeySet{Content: bufOut.String()}}, []int{0}
   119  }
   120  
   121  // findJSON searches for json encoded Tink keyset and extracts them
   122  func findJSON(buf []byte) ([]veles.Secret, []int) {
   123  	res := []veles.Secret{}
   124  	pos := []int{}
   125  	cleaned := clean(buf)
   126  	for _, m := range jsonPattern.FindAllIndex(cleaned, -1) {
   127  		l, r := m[0], m[1]
   128  		jsonBuf := cleaned[l:r]
   129  		hnd, err := insecurecleartextkeyset.Read(keyset.NewJSONReader(bytes.NewBuffer(jsonBuf)))
   130  		if err != nil {
   131  			continue
   132  		}
   133  		// Valid keyset found, convert it back to a canonical JSON string for consistent output.
   134  		bufOut := new(bytes.Buffer)
   135  		if err := insecurecleartextkeyset.Write(hnd, keyset.NewJSONWriter(bufOut)); err != nil {
   136  			return nil, nil
   137  		}
   138  		res = append(res, TinkKeySet{Content: bufOut.String()})
   139  		pos = append(pos, l)
   140  	}
   141  	return res, pos
   142  }
   143  
   144  // clean removes all levels of escaping from a given buffer by eliminating every backslash character.
   145  //
   146  // This function is designed specifically for this detector's purpose and
   147  // should not be used if your output is expected to contain backslashes
   148  func clean(s []byte) []byte {
   149  	if len(s) == 0 {
   150  		return s
   151  	}
   152  	var b bytes.Buffer
   153  	skip := false
   154  	for i := range len(s) - 1 {
   155  		if skip {
   156  			skip = false
   157  			continue
   158  		}
   159  		c := s[i]
   160  		if c == '\\' {
   161  			if s[i+1] == 'n' {
   162  				b.WriteByte('\n')
   163  				skip = true
   164  			}
   165  			continue
   166  		}
   167  		b.WriteByte(c)
   168  	}
   169  	if !skip && s[len(s)-1] != '\\' {
   170  		b.WriteByte(s[len(s)-1])
   171  	}
   172  	return b.Bytes()
   173  }