github.com/google/osv-scalibr@v0.4.1/veles/secrets/common/pair/pair.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package pair contains common logic to find secret pairs
    16  package pair
    17  
    18  import (
    19  	"regexp"
    20  	"slices"
    21  
    22  	"github.com/google/osv-scalibr/veles"
    23  )
    24  
    25  // Match contains information about a match
    26  type Match struct {
    27  	Start int
    28  	Value []byte
    29  }
    30  
    31  func (m Match) end() int {
    32  	return m.Start + len(m.Value)
    33  }
    34  
    35  // Pair contains two matches and their distance
    36  type Pair struct {
    37  	A        *Match
    38  	B        *Match
    39  	distance int
    40  }
    41  
    42  var _ veles.Detector = &Detector{}
    43  
    44  // Detector finds instances of a pair of keys
    45  type Detector struct {
    46  	// The maximum length of an element in the pair.
    47  	MaxElementLen uint32
    48  	// MaxDistance sets the maximum distance between the matches.
    49  	MaxDistance uint32
    50  	// FindA is a function that searches for the first element of a pair in the data.
    51  	// It should generally apply stricter matching rules than FindB. Its results are used to:
    52  	//  - filter out overlapping matches (removing conflicting matches from FindB)
    53  	//  - allow early termination if no matches are found.
    54  	FindA func(data []byte) []*Match
    55  	// FindB is a function that searches for the second element of a pair in the data.
    56  	FindB func(data []byte) []*Match
    57  	// Returns a veles.Secret from a Pair.
    58  	// It returns the secret and a boolean indicating success.
    59  	FromPair func(Pair) (veles.Secret, bool)
    60  	// Returns a veles.Secret from a partial Pair.
    61  	// It returns the secret and a boolean indicating success.
    62  	FromPartialPair func(Pair) (veles.Secret, bool)
    63  }
    64  
    65  // Detect implements veles.Detector.
    66  func (d *Detector) Detect(data []byte) ([]veles.Secret, []int) {
    67  	as := d.FindA(data)
    68  	// if FromPartialPair is not provided and no match was found for FindA early exit
    69  	if d.FromPartialPair == nil && len(as) == 0 {
    70  		return nil, nil
    71  	}
    72  	bs := d.FindB(data)
    73  	bs = filterOverlapping(as, bs)
    74  	return findOptimalPairs(as, bs, int(d.MaxDistance), d.FromPair, d.FromPartialPair)
    75  }
    76  
    77  // MaxSecretLen implements veles.Detector.
    78  func (d *Detector) MaxSecretLen() uint32 {
    79  	return d.MaxElementLen*2 + d.MaxDistance
    80  }
    81  
    82  // FindAllMatches returns a function which finds all matches of a given regex.
    83  func FindAllMatches(re *regexp.Regexp) func(data []byte) []*Match {
    84  	return func(data []byte) []*Match {
    85  		matches := re.FindAllSubmatchIndex(data, -1)
    86  		var results []*Match
    87  		for _, m := range matches {
    88  			results = append(results, &Match{
    89  				Start: m[0],
    90  				Value: data[m[0]:m[1]],
    91  			})
    92  		}
    93  		return results
    94  	}
    95  }
    96  
    97  // filterOverlapping filters overlapping matches, it expects both slices to be ordered
    98  // and considers the first to be more important
    99  //
   100  // usage:
   101  //
   102  //	filtered_bs = filterOverlapping(as,bs)
   103  func filterOverlapping(as, bs []*Match) []*Match {
   104  	var filtered []*Match
   105  	aIdx := 0
   106  
   107  	for _, b := range bs {
   108  		// Skip all A matches that end before B starts
   109  		for aIdx < len(as) && as[aIdx].end() <= b.Start {
   110  			aIdx++
   111  		}
   112  		// If B does not overlap the current A, keep it
   113  		if aIdx >= len(as) || b.Start < as[aIdx].Start {
   114  			filtered = append(filtered, b)
   115  		}
   116  	}
   117  	return filtered
   118  }
   119  
   120  // findOptimalPairs finds the best pairing between two sets of matches using a greedy algorithm.
   121  func findOptimalPairs(as, bs []*Match, maxDistance int, fromPair, fromPartialPair func(Pair) (veles.Secret, bool)) ([]veles.Secret, []int) {
   122  	// Find all possible pairings within maxContextLen distance
   123  	possiblePairs := findPossiblePairs(as, bs, maxDistance)
   124  
   125  	// Sort by distance (closest first)
   126  	slices.SortFunc(possiblePairs, func(a, b Pair) int {
   127  		return a.distance - b.distance
   128  	})
   129  
   130  	// Greedily select non-overlapping pairs
   131  	usedA := make(map[*Match]bool)
   132  	usedB := make(map[*Match]bool)
   133  	var secrets []veles.Secret
   134  	var positions []int
   135  
   136  	// select best match
   137  	for _, pair := range possiblePairs {
   138  		if !usedA[pair.A] && !usedB[pair.B] {
   139  			secret, ok := fromPair(pair)
   140  			if !ok {
   141  				continue
   142  			}
   143  			secrets = append(secrets, secret)
   144  			positions = append(positions, min(pair.A.Start, pair.B.Start))
   145  			usedA[pair.A] = true
   146  			usedB[pair.B] = true
   147  		}
   148  	}
   149  
   150  	if fromPartialPair == nil {
   151  		return secrets, positions
   152  	}
   153  
   154  	// leftover handling
   155  	for _, a := range as {
   156  		if !usedA[a] {
   157  			secret, ok := fromPartialPair(Pair{A: a})
   158  			if !ok {
   159  				continue
   160  			}
   161  			secrets = append(secrets, secret)
   162  			positions = append(positions, a.Start)
   163  		}
   164  	}
   165  
   166  	for _, b := range bs {
   167  		if !usedB[b] {
   168  			secret, ok := fromPartialPair(Pair{B: b})
   169  			if !ok {
   170  				continue
   171  			}
   172  			secrets = append(secrets, secret)
   173  			positions = append(positions, b.Start)
   174  		}
   175  	}
   176  
   177  	return secrets, positions
   178  }
   179  
   180  // findPossiblePairs finds all pairs within the maximum context length.
   181  func findPossiblePairs(as, bs []*Match, maxDistance int) []Pair {
   182  	var possiblePairs []Pair
   183  	for _, a := range as {
   184  		for _, b := range bs {
   185  			distance := b.Start - (a.end())
   186  			if a.Start > b.Start {
   187  				distance = a.Start - (b.end())
   188  			}
   189  
   190  			// Skip overlapping matches
   191  			// - hard check to prevent errors
   192  			// - overlapping should be handled before reaching this point
   193  			if distance < 0 {
   194  				continue
   195  			}
   196  
   197  			// Include pair if within maxDistance
   198  			if distance <= maxDistance {
   199  				possiblePairs = append(possiblePairs, Pair{A: a, B: b, distance: distance})
   200  			}
   201  		}
   202  	}
   203  	return possiblePairs
   204  }