github.com/m3db/m3@v1.5.0/src/metrics/filters/filter.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package filters
    22  
    23  import (
    24  	"bytes"
    25  	"errors"
    26  	"fmt"
    27  
    28  	"github.com/m3db/m3/src/metrics/metric/id"
    29  )
    30  
    31  var (
    32  	errInvalidFilterPattern                  = errors.New("invalid filter pattern defined")
    33  	allowAllFilter               filter      = allowFilter{}
    34  	singleAnyCharFilterForwards  chainFilter = &singleAnyCharFilter{backwards: false}
    35  	singleAnyCharFilterBackwards chainFilter = &singleAnyCharFilter{backwards: true}
    36  )
    37  
    38  // LogicalOp is a logical operator.
    39  type LogicalOp string
    40  
    41  // chainSegment is the part of the pattern that the chain represents.
    42  type chainSegment int
    43  
    44  // A list of supported logical operators.
    45  const (
    46  	// Conjunction is logical AND.
    47  	Conjunction LogicalOp = "&&"
    48  	// Disjunction is logical OR.
    49  	Disjunction LogicalOp = "||"
    50  
    51  	middle chainSegment = iota
    52  	start
    53  	end
    54  
    55  	wildcardChar         = '*'
    56  	negationChar         = '!'
    57  	singleAnyChar        = '?'
    58  	singleRangeStartChar = '['
    59  	singleRangeEndChar   = ']'
    60  	rangeChar            = '-'
    61  	multiRangeStartChar  = '{'
    62  	multiRangeEndChar    = '}'
    63  	invalidNestedChars   = "?[{"
    64  )
    65  
    66  var (
    67  	multiRangeSplit = []byte(",")
    68  )
    69  
    70  // FilterValue contains the filter pattern and a boolean flag indicating
    71  // whether the filter should be negated.
    72  type FilterValue struct {
    73  	Pattern string
    74  	Negate  bool
    75  }
    76  
    77  // Filter matches a string against certain conditions.
    78  type Filter interface {
    79  	filter
    80  
    81  	Clone() Filter
    82  }
    83  
    84  // TagsFilter matches a string of tags against certain conditions.
    85  type TagsFilter interface {
    86  	// Matches returns true if the conditions are met.
    87  	Matches(val []byte, opts TagMatchOptions) (bool, error)
    88  }
    89  
    90  // TagMatchOptions are the options for a TagsFilter match.
    91  type TagMatchOptions struct {
    92  	// Function to extract name and tags from an id.
    93  	NameAndTagsFn id.NameAndTagsFn
    94  
    95  	// Function to get a sorted tag iterator from id tags.
    96  	// The caller of Matches is the owner of the Iterator and is responsible for closing it, this allows reusing the
    97  	// same Iterator across many Matches.
    98  	SortedTagIteratorFn id.SortedTagIteratorFn
    99  }
   100  
   101  type filter interface {
   102  	fmt.Stringer
   103  
   104  	// Matches returns true if the conditions are met.
   105  	Matches(val []byte) bool
   106  }
   107  
   108  // NewFilterFromFilterValue creates a filter from the given filter value.
   109  func NewFilterFromFilterValue(fv FilterValue) (Filter, error) {
   110  	f, err := NewFilter([]byte(fv.Pattern))
   111  	if err != nil {
   112  		return nil, err
   113  	}
   114  	if !fv.Negate {
   115  		return f, nil
   116  	}
   117  	return newNegationFilter(f), nil
   118  }
   119  
   120  // NewFilter supports startsWith, endsWith, contains and a single wildcard
   121  // along with negation and glob matching support.
   122  // NOTE: Currently only supports ASCII matching and has zero compatibility
   123  // with UTF8 so you should make sure all matches are done against ASCII only.
   124  func NewFilter(pattern []byte) (Filter, error) {
   125  	// TODO(martinm): Provide more detailed error messages.
   126  	if len(pattern) == 0 {
   127  		return newEqualityFilter(pattern), nil
   128  	}
   129  
   130  	if pattern[0] != negationChar {
   131  		return newWildcardFilter(pattern)
   132  	}
   133  
   134  	if len(pattern) == 1 {
   135  		// Only negation symbol.
   136  		return nil, errInvalidFilterPattern
   137  	}
   138  
   139  	filter, err := newWildcardFilter(pattern[1:])
   140  	if err != nil {
   141  		return nil, err
   142  	}
   143  
   144  	return newNegationFilter(filter), nil
   145  }
   146  
   147  // newWildcardFilter creates a filter that segments the pattern based
   148  // on wildcards, creating a rangeFilter for each segment.
   149  func newWildcardFilter(pattern []byte) (Filter, error) {
   150  	wIdx := bytes.IndexRune(pattern, wildcardChar)
   151  
   152  	if wIdx == -1 {
   153  		// No wildcards.
   154  		return newRangeFilter(pattern, false, middle)
   155  	}
   156  
   157  	if len(pattern) == 1 {
   158  		// Whole thing is wildcard.
   159  		return newAllowFilter(), nil
   160  	}
   161  
   162  	if wIdx == len(pattern)-1 {
   163  		// Single wildcard at end.
   164  		return newRangeFilter(pattern[:len(pattern)-1], false, start)
   165  	}
   166  
   167  	secondWIdx := bytes.IndexRune(pattern[wIdx+1:], wildcardChar)
   168  	if secondWIdx == -1 {
   169  		if wIdx == 0 {
   170  			// Single wildcard at start.
   171  			return newRangeFilter(pattern[1:], true, end)
   172  		}
   173  
   174  		// Single wildcard in the middle.
   175  		first, err := newRangeFilter(pattern[:wIdx], false, start)
   176  		if err != nil {
   177  			return nil, err
   178  		}
   179  
   180  		second, err := newRangeFilter(pattern[wIdx+1:], true, end)
   181  		if err != nil {
   182  			return nil, err
   183  		}
   184  
   185  		return NewMultiFilter([]Filter{first, second}, Conjunction), nil
   186  	}
   187  
   188  	if wIdx == 0 && secondWIdx == len(pattern)-2 && len(pattern) > 2 {
   189  		// Wildcard at beginning and end.
   190  		return newContainsFilter(pattern[1 : len(pattern)-1])
   191  	}
   192  
   193  	return nil, errInvalidFilterPattern
   194  }
   195  
   196  // newRangeFilter creates a filter that checks for ranges (? or [] or {}) and segments
   197  // the pattern into a multiple chain filters based on ranges found.
   198  func newRangeFilter(pattern []byte, backwards bool, seg chainSegment) (Filter, error) {
   199  	var filters []chainFilter
   200  	eqIdx := -1
   201  	for i := 0; i < len(pattern); i++ {
   202  		if pattern[i] == singleRangeStartChar {
   203  			// Found '[', create an equality filter for the chars before this one if any
   204  			// and use vals before next ']' as input for a singleRangeFilter.
   205  			if eqIdx != -1 {
   206  				filters = append(filters, newEqualityChainFilter(pattern[eqIdx:i], backwards))
   207  				eqIdx = -1
   208  			}
   209  
   210  			endIdx := bytes.IndexRune(pattern[i+1:], singleRangeEndChar)
   211  			if endIdx == -1 {
   212  				return nil, errInvalidFilterPattern
   213  			}
   214  
   215  			f, err := newSingleRangeFilter(pattern[i+1:i+1+endIdx], backwards)
   216  			if err != nil {
   217  				return nil, errInvalidFilterPattern
   218  			}
   219  
   220  			filters = append(filters, f)
   221  			i += endIdx + 1
   222  		} else if pattern[i] == multiRangeStartChar {
   223  			// Found '{', create equality filter for chars before this if any and then
   224  			// use vals before next '}' to create multiCharRange filter.
   225  			if eqIdx != -1 {
   226  				filters = append(filters, newEqualityChainFilter(pattern[eqIdx:i], backwards))
   227  				eqIdx = -1
   228  			}
   229  
   230  			endIdx := bytes.IndexRune(pattern[i+1:], multiRangeEndChar)
   231  			if endIdx == -1 {
   232  				return nil, errInvalidFilterPattern
   233  			}
   234  
   235  			f, err := newMultiCharSequenceFilter(pattern[i+1:i+1+endIdx], backwards)
   236  			if err != nil {
   237  				return nil, errInvalidFilterPattern
   238  			}
   239  
   240  			filters = append(filters, f)
   241  			i += endIdx + 1
   242  		} else if pattern[i] == singleAnyChar {
   243  			// Found '?', create equality filter for chars before this one if any and then
   244  			// attach singleAnyCharFilter to chain.
   245  			if eqIdx != -1 {
   246  				filters = append(filters, newEqualityChainFilter(pattern[eqIdx:i], backwards))
   247  				eqIdx = -1
   248  			}
   249  
   250  			filters = append(filters, newSingleAnyCharFilter(backwards))
   251  		} else if eqIdx == -1 {
   252  			// Normal char, need to mark index to start next equality filter.
   253  			eqIdx = i
   254  		}
   255  	}
   256  
   257  	if eqIdx != -1 {
   258  		filters = append(filters, newEqualityChainFilter(pattern[eqIdx:], backwards))
   259  	}
   260  
   261  	return newMultiChainFilter(filters, seg, backwards), nil
   262  }
   263  
   264  // allowFilter is a filter that allows all.
   265  type allowFilter struct{}
   266  
   267  func newAllowFilter() Filter                  { return newImmutableFilter(allowAllFilter) }
   268  func (f allowFilter) String() string          { return "All" }
   269  func (f allowFilter) Matches(val []byte) bool { return true }
   270  
   271  // equalityFilter is a filter that matches exact values.
   272  type equalityFilter struct {
   273  	pattern []byte
   274  }
   275  
   276  func newEqualityFilter(pattern []byte) Filter {
   277  	return newImmutableFilter(&equalityFilter{pattern: pattern})
   278  }
   279  
   280  func (f *equalityFilter) String() string {
   281  	return "Equals(\"" + string(f.pattern) + "\")"
   282  }
   283  
   284  func (f *equalityFilter) Matches(val []byte) bool {
   285  	return bytes.Equal(f.pattern, val)
   286  }
   287  
   288  // containsFilter is a filter that performs contains matches.
   289  type containsFilter struct {
   290  	pattern []byte
   291  }
   292  
   293  func newContainsFilter(pattern []byte) (Filter, error) {
   294  	if bytes.ContainsAny(pattern, invalidNestedChars) {
   295  		return nil, errInvalidFilterPattern
   296  	}
   297  
   298  	return newImmutableFilter(&containsFilter{pattern: pattern}), nil
   299  }
   300  
   301  func (f *containsFilter) String() string {
   302  	return "Contains(\"" + string(f.pattern) + "\")"
   303  }
   304  
   305  func (f *containsFilter) Matches(val []byte) bool {
   306  	return bytes.Contains(val, f.pattern)
   307  }
   308  
   309  // negationFilter is a filter that matches the opposite of the provided filter.
   310  type negationFilter struct {
   311  	filter Filter
   312  }
   313  
   314  func newNegationFilter(filter Filter) Filter {
   315  	return newImmutableFilter(&negationFilter{filter: filter})
   316  }
   317  
   318  func (f *negationFilter) String() string {
   319  	return "Not(" + f.filter.String() + ")"
   320  }
   321  
   322  func (f *negationFilter) Matches(val []byte) bool {
   323  	return !f.filter.Matches(val)
   324  }
   325  
   326  // multiFilter chains multiple filters together with a logicalOp.
   327  type multiFilter struct {
   328  	filters []Filter
   329  	op      LogicalOp
   330  }
   331  
   332  // NewMultiFilter returns a filter that chains multiple filters together
   333  // using a LogicalOp.
   334  func NewMultiFilter(filters []Filter, op LogicalOp) Filter {
   335  	return newImmutableFilter(&multiFilter{filters: filters, op: op})
   336  }
   337  
   338  func (f *multiFilter) String() string {
   339  	separator := " " + string(f.op) + " "
   340  	var buf bytes.Buffer
   341  	numFilters := len(f.filters)
   342  	for i := 0; i < numFilters; i++ {
   343  		buf.WriteString(f.filters[i].String())
   344  		if i < numFilters-1 {
   345  			buf.WriteString(separator)
   346  		}
   347  	}
   348  	return buf.String()
   349  }
   350  
   351  func (f *multiFilter) Matches(val []byte) bool {
   352  	if len(f.filters) == 0 {
   353  		return true
   354  	}
   355  
   356  	for _, filter := range f.filters {
   357  		match := filter.Matches(val)
   358  		if f.op == Conjunction && !match {
   359  			return false
   360  		}
   361  
   362  		if f.op == Disjunction && match {
   363  			return true
   364  		}
   365  	}
   366  
   367  	return f.op == Conjunction
   368  }
   369  
   370  // chainFilter matches an input string against certain conditions
   371  // while returning the unmatched part of the input if there is a match.
   372  type chainFilter interface {
   373  	fmt.Stringer
   374  
   375  	matches(val []byte) ([]byte, bool)
   376  }
   377  
   378  // equalityChainFilter is a filter that performs equality string matches
   379  // from either the front or back of the string.
   380  type equalityChainFilter struct {
   381  	pattern   []byte
   382  	backwards bool
   383  }
   384  
   385  func newEqualityChainFilter(pattern []byte, backwards bool) chainFilter {
   386  	return &equalityChainFilter{pattern: pattern, backwards: backwards}
   387  }
   388  
   389  func (f *equalityChainFilter) String() string {
   390  	return "Equals(\"" + string(f.pattern) + "\")"
   391  }
   392  
   393  func (f *equalityChainFilter) matches(val []byte) ([]byte, bool) {
   394  	if f.backwards && bytes.HasSuffix(val, f.pattern) {
   395  		return val[:len(val)-len(f.pattern)], true
   396  	}
   397  
   398  	if !f.backwards && bytes.HasPrefix(val, f.pattern) {
   399  		return val[len(f.pattern):], true
   400  	}
   401  
   402  	return nil, false
   403  }
   404  
   405  // singleAnyCharFilter is a filter that allows any one char.
   406  type singleAnyCharFilter struct {
   407  	backwards bool
   408  }
   409  
   410  func newSingleAnyCharFilter(backwards bool) chainFilter {
   411  	if backwards {
   412  		return singleAnyCharFilterBackwards
   413  	}
   414  
   415  	return singleAnyCharFilterForwards
   416  }
   417  
   418  func (f *singleAnyCharFilter) String() string { return "AnyChar" }
   419  
   420  func (f *singleAnyCharFilter) matches(val []byte) ([]byte, bool) {
   421  	if len(val) == 0 {
   422  		return nil, false
   423  	}
   424  
   425  	if f.backwards {
   426  		return val[:len(val)-1], true
   427  	}
   428  
   429  	return val[1:], true
   430  }
   431  
   432  // newSingleRangeFilter creates a filter that performs range matching
   433  // on a single char.
   434  func newSingleRangeFilter(pattern []byte, backwards bool) (chainFilter, error) {
   435  	if len(pattern) == 0 {
   436  		return nil, errInvalidFilterPattern
   437  	}
   438  
   439  	negate := false
   440  	if pattern[0] == negationChar {
   441  		negate = true
   442  		pattern = pattern[1:]
   443  	}
   444  
   445  	if len(pattern) > 1 && pattern[1] == rangeChar {
   446  		// If there is a '-' char at position 2, look for repeated instances
   447  		// of a-z.
   448  		if len(pattern)%3 != 0 {
   449  			return nil, errInvalidFilterPattern
   450  		}
   451  
   452  		patterns := make([][]byte, 0, len(pattern)%3)
   453  		for i := 0; i < len(pattern); i += 3 {
   454  			if pattern[i+1] != rangeChar || pattern[i] > pattern[i+2] {
   455  				return nil, errInvalidFilterPattern
   456  			}
   457  
   458  			patterns = append(patterns, pattern[i:i+3])
   459  		}
   460  
   461  		return &singleRangeFilter{patterns: patterns, backwards: backwards, negate: negate}, nil
   462  	}
   463  
   464  	return &singleCharSetFilter{pattern: pattern, backwards: backwards, negate: negate}, nil
   465  }
   466  
   467  // singleRangeFilter is a filter that performs a single character match against
   468  // a range of chars given in a range format eg. [a-z].
   469  type singleRangeFilter struct {
   470  	patterns  [][]byte
   471  	backwards bool
   472  	negate    bool
   473  }
   474  
   475  func (f *singleRangeFilter) String() string {
   476  	var negatePrefix, negateSuffix string
   477  	if f.negate {
   478  		negatePrefix = "Not("
   479  		negateSuffix = ")"
   480  	}
   481  
   482  	return negatePrefix + "Range(\"" +
   483  		string(bytes.Join(f.patterns, []byte(fmt.Sprintf(" %s ", Disjunction)))) +
   484  		"\")" + negateSuffix
   485  }
   486  
   487  func (f *singleRangeFilter) matches(val []byte) ([]byte, bool) {
   488  	if len(val) == 0 {
   489  		return nil, false
   490  	}
   491  
   492  	match := false
   493  	idx := 0
   494  	remainder := val[1:]
   495  	if f.backwards {
   496  		idx = len(val) - 1
   497  		remainder = val[:idx]
   498  	}
   499  
   500  	for _, pattern := range f.patterns {
   501  		if val[idx] >= pattern[0] && val[idx] <= pattern[2] {
   502  			match = true
   503  			break
   504  		}
   505  	}
   506  
   507  	if f.negate {
   508  		match = !match
   509  	}
   510  
   511  	return remainder, match
   512  }
   513  
   514  // singleCharSetFilter is a filter that performs a single character match against
   515  // a set of chars given explicitly eg. [abcdefg].
   516  type singleCharSetFilter struct {
   517  	pattern   []byte
   518  	backwards bool
   519  	negate    bool
   520  }
   521  
   522  func (f *singleCharSetFilter) String() string {
   523  	var negatePrefix, negateSuffix string
   524  	if f.negate {
   525  		negatePrefix = "Not("
   526  		negateSuffix = ")"
   527  	}
   528  
   529  	return negatePrefix + "Range(\"" + string(f.pattern) + "\")" + negateSuffix
   530  }
   531  
   532  func (f *singleCharSetFilter) matches(val []byte) ([]byte, bool) {
   533  	if len(val) == 0 {
   534  		return nil, false
   535  	}
   536  
   537  	match := false
   538  	for i := 0; i < len(f.pattern); i++ {
   539  		if f.backwards && val[len(val)-1] == f.pattern[i] {
   540  			match = true
   541  			break
   542  		}
   543  
   544  		if !f.backwards && val[0] == f.pattern[i] {
   545  			match = true
   546  			break
   547  		}
   548  	}
   549  
   550  	if f.negate {
   551  		match = !match
   552  	}
   553  
   554  	if f.backwards {
   555  		return val[:len(val)-1], match
   556  	}
   557  
   558  	return val[1:], match
   559  }
   560  
   561  // multiCharRangeFilter is a filter that performs matches against multiple sets of chars
   562  // eg. {abc,defg}.
   563  type multiCharSequenceFilter struct {
   564  	patterns  [][]byte
   565  	backwards bool
   566  }
   567  
   568  func newMultiCharSequenceFilter(patterns []byte, backwards bool) (chainFilter, error) {
   569  	if len(patterns) == 0 {
   570  		return nil, errInvalidFilterPattern
   571  	}
   572  
   573  	return &multiCharSequenceFilter{
   574  		patterns:  bytes.Split(patterns, multiRangeSplit),
   575  		backwards: backwards,
   576  	}, nil
   577  }
   578  
   579  func (f *multiCharSequenceFilter) String() string {
   580  	return "Range(\"" + string(bytes.Join(f.patterns, multiRangeSplit)) + "\")"
   581  }
   582  
   583  func (f *multiCharSequenceFilter) matches(val []byte) ([]byte, bool) {
   584  	if len(val) == 0 {
   585  		return nil, false
   586  	}
   587  
   588  	for _, pattern := range f.patterns {
   589  		if f.backwards && bytes.HasSuffix(val, pattern) {
   590  			return val[:len(val)-len(pattern)], true
   591  		}
   592  
   593  		if !f.backwards && bytes.HasPrefix(val, pattern) {
   594  			return val[len(pattern):], true
   595  		}
   596  	}
   597  
   598  	return nil, false
   599  }
   600  
   601  // multiChainFilter chains multiple chainFilters together with &&.
   602  type multiChainFilter struct {
   603  	filters   []chainFilter
   604  	seg       chainSegment
   605  	backwards bool
   606  }
   607  
   608  // newMultiChainFilter creates a new multiChainFilter from given chainFilters.
   609  func newMultiChainFilter(filters []chainFilter, seg chainSegment, backwards bool) Filter {
   610  	return newImmutableFilter(&multiChainFilter{filters: filters, seg: seg, backwards: backwards})
   611  }
   612  
   613  func (f *multiChainFilter) String() string {
   614  	separator := " then "
   615  	var buf bytes.Buffer
   616  	switch f.seg {
   617  	case start:
   618  		buf.WriteString("StartsWith(")
   619  	case end:
   620  		buf.WriteString("EndsWith(")
   621  	}
   622  
   623  	numFilters := len(f.filters)
   624  	for i := 0; i < numFilters; i++ {
   625  		buf.WriteString(f.filters[i].String())
   626  		if i < numFilters-1 {
   627  			buf.WriteString(separator)
   628  		}
   629  	}
   630  
   631  	switch f.seg {
   632  	case start, end:
   633  		buf.WriteString(")")
   634  	}
   635  
   636  	return buf.String()
   637  }
   638  
   639  func (f *multiChainFilter) Matches(val []byte) bool {
   640  	if len(f.filters) == 0 {
   641  		return true
   642  	}
   643  
   644  	var match bool
   645  
   646  	if f.backwards {
   647  		for i := len(f.filters) - 1; i >= 0; i-- {
   648  			val, match = f.filters[i].matches(val)
   649  			if !match {
   650  				return false
   651  			}
   652  		}
   653  	} else {
   654  		for i := 0; i < len(f.filters); i++ {
   655  			val, match = f.filters[i].matches(val)
   656  			if !match {
   657  				return false
   658  			}
   659  		}
   660  	}
   661  
   662  	if f.seg == middle && len(val) != 0 {
   663  		// chain was middle segment and some value was left over at end of chain.
   664  		return false
   665  	}
   666  
   667  	return true
   668  }
   669  
   670  type immutableFilter struct {
   671  	f filter
   672  }
   673  
   674  func newImmutableFilter(f filter) Filter {
   675  	return immutableFilter{f: f}
   676  }
   677  
   678  func (f immutableFilter) String() string {
   679  	return f.f.String()
   680  }
   681  
   682  func (f immutableFilter) Matches(val []byte) bool {
   683  	return f.f.Matches(val)
   684  }
   685  
   686  func (f immutableFilter) Clone() Filter {
   687  	return f
   688  }