github.com/netdata/go.d.plugin@v0.58.1/pkg/matcher/glob.go (about) 1 // SPDX-License-Identifier: GPL-3.0-or-later 2 3 package matcher 4 5 import ( 6 "path/filepath" 7 "regexp" 8 "unicode/utf8" 9 10 "errors" 11 ) 12 13 // globMatcher implements Matcher, it uses filepath.MatchString to match. 14 type globMatcher string 15 16 var ( 17 errBadGlobPattern = errors.New("bad glob pattern") 18 erGlobPattern = regexp.MustCompile(`(?s)^(?:[*?]|\[\^?([^\\-\]]|\\.|.-.)+\]|\\.|[^\*\?\\\[])*$`) 19 ) 20 21 // NewGlobMatcher create a new matcher with glob format 22 func NewGlobMatcher(expr string) (Matcher, error) { 23 switch expr { 24 case "": 25 return stringFullMatcher(""), nil 26 case "*": 27 return TRUE(), nil 28 } 29 30 // any strings pass this regexp check are valid pattern 31 if !erGlobPattern.MatchString(expr) { 32 return nil, errBadGlobPattern 33 } 34 35 size := len(expr) 36 chars := []rune(expr) 37 startWith := true 38 endWith := true 39 startIdx := 0 40 endIdx := size - 1 41 if chars[startIdx] == '*' { 42 startWith = false 43 startIdx = 1 44 } 45 if chars[endIdx] == '*' { 46 endWith = false 47 endIdx-- 48 } 49 50 unescapedExpr := make([]rune, 0, endIdx-startIdx+1) 51 for i := startIdx; i <= endIdx; i++ { 52 ch := chars[i] 53 if ch == '\\' { 54 nextCh := chars[i+1] 55 unescapedExpr = append(unescapedExpr, nextCh) 56 i++ 57 } else if isGlobMeta(ch) { 58 return globMatcher(expr), nil 59 } else { 60 unescapedExpr = append(unescapedExpr, ch) 61 } 62 } 63 64 return NewStringMatcher(string(unescapedExpr), startWith, endWith) 65 } 66 67 func isGlobMeta(ch rune) bool { 68 switch ch { 69 case '*', '?', '[': 70 return true 71 default: 72 return false 73 } 74 } 75 76 // Match matches. 77 func (m globMatcher) Match(b []byte) bool { 78 return m.MatchString(string(b)) 79 } 80 81 // MatchString matches. 82 func (m globMatcher) MatchString(line string) bool { 83 rs, _ := m.globMatch(line) 84 return rs 85 } 86 87 func (m globMatcher) globMatch(name string) (matched bool, err error) { 88 pattern := string(m) 89 Pattern: 90 for len(pattern) > 0 { 91 var star bool 92 var chunk string 93 star, chunk, pattern = scanChunk(pattern) 94 if star && chunk == "" { 95 // Trailing * matches rest of string unless it has a /. 96 // return !strings.Contains(name, string(Separator)), nil 97 98 return true, nil 99 } 100 // Look for match at current position. 101 t, ok, err := matchChunk(chunk, name) 102 // if we're the last chunk, make sure we've exhausted the name 103 // otherwise we'll give a false result even if we could still match 104 // using the star 105 if ok && (len(t) == 0 || len(pattern) > 0) { 106 name = t 107 continue 108 } 109 if err != nil { 110 return false, err 111 } 112 if star { 113 // Look for match skipping i+1 bytes. 114 // Cannot skip /. 115 for i := 0; i < len(name); i++ { 116 //for i := 0; i < len(name) && name[i] != Separator; i++ { 117 t, ok, err := matchChunk(chunk, name[i+1:]) 118 if ok { 119 // if we're the last chunk, make sure we exhausted the name 120 if len(pattern) == 0 && len(t) > 0 { 121 continue 122 } 123 name = t 124 continue Pattern 125 } 126 if err != nil { 127 return false, err 128 } 129 } 130 } 131 return false, nil 132 } 133 return len(name) == 0, nil 134 } 135 136 // scanChunk gets the next segment of pattern, which is a non-star string 137 // possibly preceded by a star. 138 func scanChunk(pattern string) (star bool, chunk, rest string) { 139 for len(pattern) > 0 && pattern[0] == '*' { 140 pattern = pattern[1:] 141 star = true 142 } 143 inrange := false 144 var i int 145 Scan: 146 for i = 0; i < len(pattern); i++ { 147 switch pattern[i] { 148 case '\\': 149 if i+1 < len(pattern) { 150 i++ 151 } 152 case '[': 153 inrange = true 154 case ']': 155 inrange = false 156 case '*': 157 if !inrange { 158 break Scan 159 } 160 } 161 } 162 return star, pattern[0:i], pattern[i:] 163 } 164 165 // matchChunk checks whether chunk matches the beginning of s. 166 // If so, it returns the remainder of s (after the match). 167 // Chunk is all single-character operators: literals, char classes, and ?. 168 func matchChunk(chunk, s string) (rest string, ok bool, err error) { 169 for len(chunk) > 0 { 170 if len(s) == 0 { 171 return 172 } 173 switch chunk[0] { 174 case '[': 175 // character class 176 r, n := utf8.DecodeRuneInString(s) 177 s = s[n:] 178 chunk = chunk[1:] 179 // We can't end right after '[', we're expecting at least 180 // a closing bracket and possibly a caret. 181 if len(chunk) == 0 { 182 err = filepath.ErrBadPattern 183 return 184 } 185 // possibly negated 186 negated := chunk[0] == '^' 187 if negated { 188 chunk = chunk[1:] 189 } 190 // parse all ranges 191 match := false 192 nrange := 0 193 for { 194 if len(chunk) > 0 && chunk[0] == ']' && nrange > 0 { 195 chunk = chunk[1:] 196 break 197 } 198 var lo, hi rune 199 if lo, chunk, err = getEsc(chunk); err != nil { 200 return 201 } 202 hi = lo 203 if chunk[0] == '-' { 204 if hi, chunk, err = getEsc(chunk[1:]); err != nil { 205 return 206 } 207 } 208 if lo <= r && r <= hi { 209 match = true 210 } 211 nrange++ 212 } 213 if match == negated { 214 return 215 } 216 217 case '?': 218 //if s[0] == Separator { 219 // return 220 //} 221 _, n := utf8.DecodeRuneInString(s) 222 s = s[n:] 223 chunk = chunk[1:] 224 225 case '\\': 226 chunk = chunk[1:] 227 if len(chunk) == 0 { 228 err = filepath.ErrBadPattern 229 return 230 } 231 fallthrough 232 233 default: 234 if chunk[0] != s[0] { 235 return 236 } 237 s = s[1:] 238 chunk = chunk[1:] 239 } 240 } 241 return s, true, nil 242 } 243 244 // getEsc gets a possibly-escaped character from chunk, for a character class. 245 func getEsc(chunk string) (r rune, nchunk string, err error) { 246 if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' { 247 err = filepath.ErrBadPattern 248 return 249 } 250 if chunk[0] == '\\' { 251 chunk = chunk[1:] 252 if len(chunk) == 0 { 253 err = filepath.ErrBadPattern 254 return 255 } 256 } 257 r, n := utf8.DecodeRuneInString(chunk) 258 if r == utf8.RuneError && n == 1 { 259 err = filepath.ErrBadPattern 260 } 261 nchunk = chunk[n:] 262 if len(nchunk) == 0 { 263 err = filepath.ErrBadPattern 264 } 265 return 266 }