github.com/google/osv-scalibr@v0.4.1/veles/secrets/common/pair/pair.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package pair contains common logic to find secret pairs 16 package pair 17 18 import ( 19 "regexp" 20 "slices" 21 22 "github.com/google/osv-scalibr/veles" 23 ) 24 25 // Match contains information about a match 26 type Match struct { 27 Start int 28 Value []byte 29 } 30 31 func (m Match) end() int { 32 return m.Start + len(m.Value) 33 } 34 35 // Pair contains two matches and their distance 36 type Pair struct { 37 A *Match 38 B *Match 39 distance int 40 } 41 42 var _ veles.Detector = &Detector{} 43 44 // Detector finds instances of a pair of keys 45 type Detector struct { 46 // The maximum length of an element in the pair. 47 MaxElementLen uint32 48 // MaxDistance sets the maximum distance between the matches. 49 MaxDistance uint32 50 // FindA is a function that searches for the first element of a pair in the data. 51 // It should generally apply stricter matching rules than FindB. Its results are used to: 52 // - filter out overlapping matches (removing conflicting matches from FindB) 53 // - allow early termination if no matches are found. 54 FindA func(data []byte) []*Match 55 // FindB is a function that searches for the second element of a pair in the data. 56 FindB func(data []byte) []*Match 57 // Returns a veles.Secret from a Pair. 58 // It returns the secret and a boolean indicating success. 59 FromPair func(Pair) (veles.Secret, bool) 60 // Returns a veles.Secret from a partial Pair. 61 // It returns the secret and a boolean indicating success. 62 FromPartialPair func(Pair) (veles.Secret, bool) 63 } 64 65 // Detect implements veles.Detector. 66 func (d *Detector) Detect(data []byte) ([]veles.Secret, []int) { 67 as := d.FindA(data) 68 // if FromPartialPair is not provided and no match was found for FindA early exit 69 if d.FromPartialPair == nil && len(as) == 0 { 70 return nil, nil 71 } 72 bs := d.FindB(data) 73 bs = filterOverlapping(as, bs) 74 return findOptimalPairs(as, bs, int(d.MaxDistance), d.FromPair, d.FromPartialPair) 75 } 76 77 // MaxSecretLen implements veles.Detector. 78 func (d *Detector) MaxSecretLen() uint32 { 79 return d.MaxElementLen*2 + d.MaxDistance 80 } 81 82 // FindAllMatches returns a function which finds all matches of a given regex. 83 func FindAllMatches(re *regexp.Regexp) func(data []byte) []*Match { 84 return func(data []byte) []*Match { 85 matches := re.FindAllSubmatchIndex(data, -1) 86 var results []*Match 87 for _, m := range matches { 88 results = append(results, &Match{ 89 Start: m[0], 90 Value: data[m[0]:m[1]], 91 }) 92 } 93 return results 94 } 95 } 96 97 // filterOverlapping filters overlapping matches, it expects both slices to be ordered 98 // and considers the first to be more important 99 // 100 // usage: 101 // 102 // filtered_bs = filterOverlapping(as,bs) 103 func filterOverlapping(as, bs []*Match) []*Match { 104 var filtered []*Match 105 aIdx := 0 106 107 for _, b := range bs { 108 // Skip all A matches that end before B starts 109 for aIdx < len(as) && as[aIdx].end() <= b.Start { 110 aIdx++ 111 } 112 // If B does not overlap the current A, keep it 113 if aIdx >= len(as) || b.Start < as[aIdx].Start { 114 filtered = append(filtered, b) 115 } 116 } 117 return filtered 118 } 119 120 // findOptimalPairs finds the best pairing between two sets of matches using a greedy algorithm. 121 func findOptimalPairs(as, bs []*Match, maxDistance int, fromPair, fromPartialPair func(Pair) (veles.Secret, bool)) ([]veles.Secret, []int) { 122 // Find all possible pairings within maxContextLen distance 123 possiblePairs := findPossiblePairs(as, bs, maxDistance) 124 125 // Sort by distance (closest first) 126 slices.SortFunc(possiblePairs, func(a, b Pair) int { 127 return a.distance - b.distance 128 }) 129 130 // Greedily select non-overlapping pairs 131 usedA := make(map[*Match]bool) 132 usedB := make(map[*Match]bool) 133 var secrets []veles.Secret 134 var positions []int 135 136 // select best match 137 for _, pair := range possiblePairs { 138 if !usedA[pair.A] && !usedB[pair.B] { 139 secret, ok := fromPair(pair) 140 if !ok { 141 continue 142 } 143 secrets = append(secrets, secret) 144 positions = append(positions, min(pair.A.Start, pair.B.Start)) 145 usedA[pair.A] = true 146 usedB[pair.B] = true 147 } 148 } 149 150 if fromPartialPair == nil { 151 return secrets, positions 152 } 153 154 // leftover handling 155 for _, a := range as { 156 if !usedA[a] { 157 secret, ok := fromPartialPair(Pair{A: a}) 158 if !ok { 159 continue 160 } 161 secrets = append(secrets, secret) 162 positions = append(positions, a.Start) 163 } 164 } 165 166 for _, b := range bs { 167 if !usedB[b] { 168 secret, ok := fromPartialPair(Pair{B: b}) 169 if !ok { 170 continue 171 } 172 secrets = append(secrets, secret) 173 positions = append(positions, b.Start) 174 } 175 } 176 177 return secrets, positions 178 } 179 180 // findPossiblePairs finds all pairs within the maximum context length. 181 func findPossiblePairs(as, bs []*Match, maxDistance int) []Pair { 182 var possiblePairs []Pair 183 for _, a := range as { 184 for _, b := range bs { 185 distance := b.Start - (a.end()) 186 if a.Start > b.Start { 187 distance = a.Start - (b.end()) 188 } 189 190 // Skip overlapping matches 191 // - hard check to prevent errors 192 // - overlapping should be handled before reaching this point 193 if distance < 0 { 194 continue 195 } 196 197 // Include pair if within maxDistance 198 if distance <= maxDistance { 199 possiblePairs = append(possiblePairs, Pair{A: a, B: b, distance: distance}) 200 } 201 } 202 } 203 return possiblePairs 204 }