github.com/anchore/syft@v1.38.2/internal/regex_helpers.go (about) 1 package internal 2 3 import ( 4 "io" 5 "regexp" 6 ) 7 8 const readerChunkSize = 1024 * 1024 9 10 // MatchNamedCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map. 11 // This is only for the first match in the regex. Callers shouldn't be providing regexes with multiple capture groups with the same name. 12 func MatchNamedCaptureGroups(regEx *regexp.Regexp, content string) map[string]string { 13 // note: we are looking across all matches and stopping on the first non-empty match. Why? Take the following example: 14 // input: "cool something to match against" pattern: `((?P<name>match) (?P<version>against))?`. Since the pattern is 15 // encapsulated in an optional capture group, there will be results for each character, but the results will match 16 // on nothing. The only "true" match will be at the end ("match against"). 17 allMatches := regEx.FindAllStringSubmatch(content, -1) 18 var results map[string]string 19 for _, match := range allMatches { 20 // fill a candidate results map with named capture group results, accepting empty values, but not groups with 21 // no names 22 for nameIdx, name := range regEx.SubexpNames() { 23 if nameIdx > len(match) || len(name) == 0 { 24 continue 25 } 26 if results == nil { 27 results = make(map[string]string) 28 } 29 results[name] = match[nameIdx] 30 } 31 // note: since we are looking for the first best potential match we should stop when we find the first one 32 // with non-empty results. 33 if !isEmptyMap(results) { 34 break 35 } 36 } 37 return results 38 } 39 40 // MatchNamedCaptureGroupsFromReader matches named capture groups from a reader, assuming the pattern fits within 41 // 1.5x the reader chunk size (1MB * 1.5). 42 func MatchNamedCaptureGroupsFromReader(re *regexp.Regexp, r io.Reader) (map[string]string, error) { 43 results := make(map[string]string) 44 matches, err := processReaderInChunks(r, readerChunkSize, matchNamedCaptureGroupsHandler(re, results)) 45 if err != nil { 46 return nil, err 47 } 48 if !matches { 49 return nil, nil 50 } 51 return results, nil 52 } 53 54 // MatchAnyFromReader matches any of the provided regular expressions from a reader, assuming the pattern fits within 55 // 1.5x the reader chunk size (1MB * 1.5). 56 func MatchAnyFromReader(r io.Reader, res ...*regexp.Regexp) (bool, error) { 57 return processReaderInChunks(r, readerChunkSize, matchAnyHandler(res)) 58 } 59 60 func matchNamedCaptureGroupsHandler(re *regexp.Regexp, results map[string]string) func(data []byte) (bool, error) { 61 return func(data []byte) (bool, error) { 62 if match := re.FindSubmatch(data); match != nil { 63 groupNames := re.SubexpNames() 64 for i, name := range groupNames { 65 if i > 0 && name != "" { 66 results[name] = string(match[i]) 67 } 68 } 69 return true, nil 70 } 71 return false, nil 72 } 73 } 74 75 func matchAnyHandler(res []*regexp.Regexp) func(data []byte) (bool, error) { 76 return func(data []byte) (bool, error) { 77 for _, re := range res { 78 if re.Match(data) { 79 return true, nil 80 } 81 } 82 return false, nil 83 } 84 } 85 86 // processReaderInChunks reads from the provided reader in chunks and calls the provided handler with each chunk + portion of the previous neighboring chunk. 87 // Note that we only overlap the last half of the previous chunk with the current chunk to avoid missing matches that span chunk boundaries. 88 func processReaderInChunks(rdr io.Reader, chunkSize int, handler func(data []byte) (bool, error)) (bool, error) { 89 half := chunkSize / 2 90 bufSize := chunkSize + half 91 buf := make([]byte, bufSize) 92 lastRead := 0 93 94 for { 95 offset := half 96 if lastRead < half { 97 offset = lastRead 98 } 99 start := half - offset 100 if lastRead > 0 { 101 copy(buf[start:], buf[half+offset:half+lastRead]) 102 } 103 n, err := rdr.Read(buf[half:]) 104 if err != nil { 105 break 106 } 107 108 // process the combined data with the handler 109 matched, handlerErr := handler(buf[start : half+n]) 110 if handlerErr != nil { 111 return false, handlerErr 112 } 113 if matched { 114 return true, nil 115 } 116 117 lastRead = n 118 } 119 120 return false, nil 121 } 122 123 func isEmptyMap(m map[string]string) bool { 124 if len(m) == 0 { 125 return true 126 } 127 for _, value := range m { 128 if value != "" { 129 return false 130 } 131 } 132 return true 133 }