github.com/netdata/go.d.plugin@v0.58.1/pkg/matcher/glob.go (about)

     1  // SPDX-License-Identifier: GPL-3.0-or-later
     2  
     3  package matcher
     4  
     5  import (
     6  	"path/filepath"
     7  	"regexp"
     8  	"unicode/utf8"
     9  
    10  	"errors"
    11  )
    12  
    13  // globMatcher implements Matcher, it uses filepath.MatchString to match.
    14  type globMatcher string
    15  
    16  var (
    17  	errBadGlobPattern = errors.New("bad glob pattern")
    18  	erGlobPattern     = regexp.MustCompile(`(?s)^(?:[*?]|\[\^?([^\\-\]]|\\.|.-.)+\]|\\.|[^\*\?\\\[])*$`)
    19  )
    20  
    21  // NewGlobMatcher create a new matcher with glob format
    22  func NewGlobMatcher(expr string) (Matcher, error) {
    23  	switch expr {
    24  	case "":
    25  		return stringFullMatcher(""), nil
    26  	case "*":
    27  		return TRUE(), nil
    28  	}
    29  
    30  	// any strings pass this regexp check are valid pattern
    31  	if !erGlobPattern.MatchString(expr) {
    32  		return nil, errBadGlobPattern
    33  	}
    34  
    35  	size := len(expr)
    36  	chars := []rune(expr)
    37  	startWith := true
    38  	endWith := true
    39  	startIdx := 0
    40  	endIdx := size - 1
    41  	if chars[startIdx] == '*' {
    42  		startWith = false
    43  		startIdx = 1
    44  	}
    45  	if chars[endIdx] == '*' {
    46  		endWith = false
    47  		endIdx--
    48  	}
    49  
    50  	unescapedExpr := make([]rune, 0, endIdx-startIdx+1)
    51  	for i := startIdx; i <= endIdx; i++ {
    52  		ch := chars[i]
    53  		if ch == '\\' {
    54  			nextCh := chars[i+1]
    55  			unescapedExpr = append(unescapedExpr, nextCh)
    56  			i++
    57  		} else if isGlobMeta(ch) {
    58  			return globMatcher(expr), nil
    59  		} else {
    60  			unescapedExpr = append(unescapedExpr, ch)
    61  		}
    62  	}
    63  
    64  	return NewStringMatcher(string(unescapedExpr), startWith, endWith)
    65  }
    66  
    67  func isGlobMeta(ch rune) bool {
    68  	switch ch {
    69  	case '*', '?', '[':
    70  		return true
    71  	default:
    72  		return false
    73  	}
    74  }
    75  
    76  // Match matches.
    77  func (m globMatcher) Match(b []byte) bool {
    78  	return m.MatchString(string(b))
    79  }
    80  
    81  // MatchString matches.
    82  func (m globMatcher) MatchString(line string) bool {
    83  	rs, _ := m.globMatch(line)
    84  	return rs
    85  }
    86  
    87  func (m globMatcher) globMatch(name string) (matched bool, err error) {
    88  	pattern := string(m)
    89  Pattern:
    90  	for len(pattern) > 0 {
    91  		var star bool
    92  		var chunk string
    93  		star, chunk, pattern = scanChunk(pattern)
    94  		if star && chunk == "" {
    95  			// Trailing * matches rest of string unless it has a /.
    96  			// return !strings.Contains(name, string(Separator)), nil
    97  
    98  			return true, nil
    99  		}
   100  		// Look for match at current position.
   101  		t, ok, err := matchChunk(chunk, name)
   102  		// if we're the last chunk, make sure we've exhausted the name
   103  		// otherwise we'll give a false result even if we could still match
   104  		// using the star
   105  		if ok && (len(t) == 0 || len(pattern) > 0) {
   106  			name = t
   107  			continue
   108  		}
   109  		if err != nil {
   110  			return false, err
   111  		}
   112  		if star {
   113  			// Look for match skipping i+1 bytes.
   114  			// Cannot skip /.
   115  			for i := 0; i < len(name); i++ {
   116  				//for i := 0; i < len(name) && name[i] != Separator; i++ {
   117  				t, ok, err := matchChunk(chunk, name[i+1:])
   118  				if ok {
   119  					// if we're the last chunk, make sure we exhausted the name
   120  					if len(pattern) == 0 && len(t) > 0 {
   121  						continue
   122  					}
   123  					name = t
   124  					continue Pattern
   125  				}
   126  				if err != nil {
   127  					return false, err
   128  				}
   129  			}
   130  		}
   131  		return false, nil
   132  	}
   133  	return len(name) == 0, nil
   134  }
   135  
   136  // scanChunk gets the next segment of pattern, which is a non-star string
   137  // possibly preceded by a star.
   138  func scanChunk(pattern string) (star bool, chunk, rest string) {
   139  	for len(pattern) > 0 && pattern[0] == '*' {
   140  		pattern = pattern[1:]
   141  		star = true
   142  	}
   143  	inrange := false
   144  	var i int
   145  Scan:
   146  	for i = 0; i < len(pattern); i++ {
   147  		switch pattern[i] {
   148  		case '\\':
   149  			if i+1 < len(pattern) {
   150  				i++
   151  			}
   152  		case '[':
   153  			inrange = true
   154  		case ']':
   155  			inrange = false
   156  		case '*':
   157  			if !inrange {
   158  				break Scan
   159  			}
   160  		}
   161  	}
   162  	return star, pattern[0:i], pattern[i:]
   163  }
   164  
   165  // matchChunk checks whether chunk matches the beginning of s.
   166  // If so, it returns the remainder of s (after the match).
   167  // Chunk is all single-character operators: literals, char classes, and ?.
   168  func matchChunk(chunk, s string) (rest string, ok bool, err error) {
   169  	for len(chunk) > 0 {
   170  		if len(s) == 0 {
   171  			return
   172  		}
   173  		switch chunk[0] {
   174  		case '[':
   175  			// character class
   176  			r, n := utf8.DecodeRuneInString(s)
   177  			s = s[n:]
   178  			chunk = chunk[1:]
   179  			// We can't end right after '[', we're expecting at least
   180  			// a closing bracket and possibly a caret.
   181  			if len(chunk) == 0 {
   182  				err = filepath.ErrBadPattern
   183  				return
   184  			}
   185  			// possibly negated
   186  			negated := chunk[0] == '^'
   187  			if negated {
   188  				chunk = chunk[1:]
   189  			}
   190  			// parse all ranges
   191  			match := false
   192  			nrange := 0
   193  			for {
   194  				if len(chunk) > 0 && chunk[0] == ']' && nrange > 0 {
   195  					chunk = chunk[1:]
   196  					break
   197  				}
   198  				var lo, hi rune
   199  				if lo, chunk, err = getEsc(chunk); err != nil {
   200  					return
   201  				}
   202  				hi = lo
   203  				if chunk[0] == '-' {
   204  					if hi, chunk, err = getEsc(chunk[1:]); err != nil {
   205  						return
   206  					}
   207  				}
   208  				if lo <= r && r <= hi {
   209  					match = true
   210  				}
   211  				nrange++
   212  			}
   213  			if match == negated {
   214  				return
   215  			}
   216  
   217  		case '?':
   218  			//if s[0] == Separator {
   219  			//	return
   220  			//}
   221  			_, n := utf8.DecodeRuneInString(s)
   222  			s = s[n:]
   223  			chunk = chunk[1:]
   224  
   225  		case '\\':
   226  			chunk = chunk[1:]
   227  			if len(chunk) == 0 {
   228  				err = filepath.ErrBadPattern
   229  				return
   230  			}
   231  			fallthrough
   232  
   233  		default:
   234  			if chunk[0] != s[0] {
   235  				return
   236  			}
   237  			s = s[1:]
   238  			chunk = chunk[1:]
   239  		}
   240  	}
   241  	return s, true, nil
   242  }
   243  
   244  // getEsc gets a possibly-escaped character from chunk, for a character class.
   245  func getEsc(chunk string) (r rune, nchunk string, err error) {
   246  	if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' {
   247  		err = filepath.ErrBadPattern
   248  		return
   249  	}
   250  	if chunk[0] == '\\' {
   251  		chunk = chunk[1:]
   252  		if len(chunk) == 0 {
   253  			err = filepath.ErrBadPattern
   254  			return
   255  		}
   256  	}
   257  	r, n := utf8.DecodeRuneInString(chunk)
   258  	if r == utf8.RuneError && n == 1 {
   259  		err = filepath.ErrBadPattern
   260  	}
   261  	nchunk = chunk[n:]
   262  	if len(nchunk) == 0 {
   263  		err = filepath.ErrBadPattern
   264  	}
   265  	return
   266  }