github.com/google/osv-scalibr@v0.4.1/veles/secrets/common/flatjson/flatjson.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package flatjson contains facilities to extract credentials that are
    16  // expressed as a single (flat) JSON object whose values are all strings.
    17  //
    18  // This can be use to extract GCP service account keys, GCP API keys, etc.
    19  package flatjson
    20  
    21  import (
    22  	"regexp"
    23  	"strings"
    24  )
    25  
    26  const (
    27  	// DefaultMaxMatches is the default value for an Extractor's MaxMatches. It is
    28  	// used to limit the number of matches to constrain runtime.
    29  	DefaultMaxMatches = 20
    30  )
    31  
    32  var (
    33  	// Matches on top-level JSON string fields at arbitrary levels of escaping.
    34  	// This can be used to reliably extract the fields of a GCP SAK from something
    35  	// matched by reJSON at a higher sensitivity than relying on Go's JSON
    36  	// parsing.
    37  	reExtract = regexp.MustCompile(`[\\"]*"([^"]*)[\\"]*":\s*[\\"]*"([^"]*)[\\"]*",?`)
    38  )
    39  
    40  // Extractor extracts key-value pairs with required or optional keys from an
    41  // input. It assumes that the key-value pairs are contained in a flat JSON
    42  // object as it is the case for e.g. GCP service account keys.
    43  type Extractor struct {
    44  	// keysRequired contains all the keys whose values the Extractor should
    45  	// extract from the input.
    46  	// If, for a given key k, keysRequired[k] is true, the key is required: if
    47  	// it's absent from the result or empty, a nil result is returned. Keys for
    48  	// which keysRequired[k] is false are contained in the result if they are
    49  	// present in the input but don't cause a nil result if they are missing.
    50  	keysRequired map[string]bool
    51  
    52  	numRequired int
    53  
    54  	// MaxMatches limits the number of matches to constrain runtime. The default
    55  	// is 20 but it can be set arbitrarily, even to -1 in which case no limit is
    56  	// applied.
    57  	//
    58  	// For example Although we're expecting only about 10 fields for GCP service
    59  	// account keys, it makes sense to have a slightly larger limit, because
    60  	// fields like "universe" get added to the key or people might add their own
    61  	// metadata to the JSON structure.
    62  	MaxMatches int
    63  }
    64  
    65  // NewExtractor creates an Extractor that can be used to extract flat-JSON
    66  // key-value pairs from an input. The requiredKeys take precedence; if a key is
    67  // present in both requiredKeys and optionalKeys (although it really shouldn't
    68  // be), it's considered required.
    69  func NewExtractor(requiredKeys []string, optionalKeys []string) *Extractor {
    70  	e := &Extractor{
    71  		keysRequired: make(map[string]bool, len(requiredKeys)+len(optionalKeys)),
    72  		numRequired:  len(requiredKeys),
    73  		MaxMatches:   DefaultMaxMatches,
    74  	}
    75  	for _, k := range optionalKeys {
    76  		e.keysRequired[k] = false
    77  	}
    78  	for _, k := range requiredKeys {
    79  		e.keysRequired[k] = true
    80  	}
    81  	return e
    82  }
    83  
    84  // Extract extracts the required and optional keys alongside their values from
    85  // the flat JSON object contained in data.
    86  func (e *Extractor) Extract(data []byte) map[string]string {
    87  	kv := make(map[string]string, e.numRequired)
    88  	subs := reExtract.FindAllSubmatch(data, e.MaxMatches)
    89  	for _, sub := range subs {
    90  		key := clean(sub[1])
    91  		if _, want := e.keysRequired[key]; !want {
    92  			continue
    93  		}
    94  		kv[key] = clean(sub[2])
    95  	}
    96  	for key, required := range e.keysRequired {
    97  		if !required {
    98  			continue
    99  		}
   100  		value, present := kv[key]
   101  		if !present || value == "" {
   102  			return nil
   103  		}
   104  	}
   105  	return kv
   106  }
   107  
   108  // clean removes all levels of escaping from a string containing a flat (one
   109  // level deep) JSON object that can be escaped arbitrarily often.
   110  func clean(s []byte) string {
   111  	if len(s) == 0 {
   112  		return ""
   113  	}
   114  	var b strings.Builder
   115  	skip := false
   116  	for i := range len(s) - 1 {
   117  		if skip {
   118  			skip = false
   119  			continue
   120  		}
   121  		c := s[i]
   122  		if c == '\\' {
   123  			if s[i+1] == 'n' {
   124  				b.WriteByte('\n')
   125  				skip = true
   126  			}
   127  			continue
   128  		}
   129  		b.WriteByte(c)
   130  	}
   131  	if !skip && s[len(s)-1] != '\\' {
   132  		b.WriteByte(s[len(s)-1])
   133  	}
   134  	return b.String()
   135  }