github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/metrics/filters/filter.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package filters 22 23 import ( 24 "bytes" 25 "errors" 26 "fmt" 27 28 "github.com/m3db/m3/src/metrics/metric/id" 29 ) 30 31 var ( 32 errInvalidFilterPattern = errors.New("invalid filter pattern defined") 33 allowAllFilter filter = allowFilter{} 34 singleAnyCharFilterForwards chainFilter = &singleAnyCharFilter{backwards: false} 35 singleAnyCharFilterBackwards chainFilter = &singleAnyCharFilter{backwards: true} 36 ) 37 38 // LogicalOp is a logical operator. 39 type LogicalOp string 40 41 // chainSegment is the part of the pattern that the chain represents. 42 type chainSegment int 43 44 // A list of supported logical operators. 45 const ( 46 // Conjunction is logical AND. 47 Conjunction LogicalOp = "&&" 48 // Disjunction is logical OR. 49 Disjunction LogicalOp = "||" 50 51 middle chainSegment = iota 52 start 53 end 54 55 wildcardChar = '*' 56 negationChar = '!' 57 singleAnyChar = '?' 58 singleRangeStartChar = '[' 59 singleRangeEndChar = ']' 60 rangeChar = '-' 61 multiRangeStartChar = '{' 62 multiRangeEndChar = '}' 63 invalidNestedChars = "?[{" 64 ) 65 66 var ( 67 multiRangeSplit = []byte(",") 68 ) 69 70 // FilterValue contains the filter pattern and a boolean flag indicating 71 // whether the filter should be negated. 72 type FilterValue struct { 73 Pattern string 74 Negate bool 75 } 76 77 // Filter matches a string against certain conditions. 78 type Filter interface { 79 filter 80 81 Clone() Filter 82 } 83 84 // TagsFilter matches a string of tags against certain conditions. 85 type TagsFilter interface { 86 // Matches returns true if the conditions are met. 87 Matches(val []byte, opts TagMatchOptions) (bool, error) 88 } 89 90 // TagMatchOptions are the options for a TagsFilter match. 91 type TagMatchOptions struct { 92 // Function to extract name and tags from an id. 93 NameAndTagsFn id.NameAndTagsFn 94 95 // Function to get a sorted tag iterator from id tags. 96 // The caller of Matches is the owner of the Iterator and is responsible for closing it, this allows reusing the 97 // same Iterator across many Matches. 98 SortedTagIteratorFn id.SortedTagIteratorFn 99 } 100 101 type filter interface { 102 fmt.Stringer 103 104 // Matches returns true if the conditions are met. 105 Matches(val []byte) bool 106 } 107 108 // NewFilterFromFilterValue creates a filter from the given filter value. 109 func NewFilterFromFilterValue(fv FilterValue) (Filter, error) { 110 f, err := NewFilter([]byte(fv.Pattern)) 111 if err != nil { 112 return nil, err 113 } 114 if !fv.Negate { 115 return f, nil 116 } 117 return newNegationFilter(f), nil 118 } 119 120 // NewFilter supports startsWith, endsWith, contains and a single wildcard 121 // along with negation and glob matching support. 122 // NOTE: Currently only supports ASCII matching and has zero compatibility 123 // with UTF8 so you should make sure all matches are done against ASCII only. 124 func NewFilter(pattern []byte) (Filter, error) { 125 // TODO(martinm): Provide more detailed error messages. 126 if len(pattern) == 0 { 127 return newEqualityFilter(pattern), nil 128 } 129 130 if pattern[0] != negationChar { 131 return newWildcardFilter(pattern) 132 } 133 134 if len(pattern) == 1 { 135 // Only negation symbol. 136 return nil, errInvalidFilterPattern 137 } 138 139 filter, err := newWildcardFilter(pattern[1:]) 140 if err != nil { 141 return nil, err 142 } 143 144 return newNegationFilter(filter), nil 145 } 146 147 // newWildcardFilter creates a filter that segments the pattern based 148 // on wildcards, creating a rangeFilter for each segment. 149 func newWildcardFilter(pattern []byte) (Filter, error) { 150 wIdx := bytes.IndexRune(pattern, wildcardChar) 151 152 if wIdx == -1 { 153 // No wildcards. 154 return newRangeFilter(pattern, false, middle) 155 } 156 157 if len(pattern) == 1 { 158 // Whole thing is wildcard. 159 return newAllowFilter(), nil 160 } 161 162 if wIdx == len(pattern)-1 { 163 // Single wildcard at end. 164 return newRangeFilter(pattern[:len(pattern)-1], false, start) 165 } 166 167 secondWIdx := bytes.IndexRune(pattern[wIdx+1:], wildcardChar) 168 if secondWIdx == -1 { 169 if wIdx == 0 { 170 // Single wildcard at start. 171 return newRangeFilter(pattern[1:], true, end) 172 } 173 174 // Single wildcard in the middle. 175 first, err := newRangeFilter(pattern[:wIdx], false, start) 176 if err != nil { 177 return nil, err 178 } 179 180 second, err := newRangeFilter(pattern[wIdx+1:], true, end) 181 if err != nil { 182 return nil, err 183 } 184 185 return NewMultiFilter([]Filter{first, second}, Conjunction), nil 186 } 187 188 if wIdx == 0 && secondWIdx == len(pattern)-2 && len(pattern) > 2 { 189 // Wildcard at beginning and end. 190 return newContainsFilter(pattern[1 : len(pattern)-1]) 191 } 192 193 return nil, errInvalidFilterPattern 194 } 195 196 // newRangeFilter creates a filter that checks for ranges (? or [] or {}) and segments 197 // the pattern into a multiple chain filters based on ranges found. 198 func newRangeFilter(pattern []byte, backwards bool, seg chainSegment) (Filter, error) { 199 var filters []chainFilter 200 eqIdx := -1 201 for i := 0; i < len(pattern); i++ { 202 if pattern[i] == singleRangeStartChar { 203 // Found '[', create an equality filter for the chars before this one if any 204 // and use vals before next ']' as input for a singleRangeFilter. 205 if eqIdx != -1 { 206 filters = append(filters, newEqualityChainFilter(pattern[eqIdx:i], backwards)) 207 eqIdx = -1 208 } 209 210 endIdx := bytes.IndexRune(pattern[i+1:], singleRangeEndChar) 211 if endIdx == -1 { 212 return nil, errInvalidFilterPattern 213 } 214 215 f, err := newSingleRangeFilter(pattern[i+1:i+1+endIdx], backwards) 216 if err != nil { 217 return nil, errInvalidFilterPattern 218 } 219 220 filters = append(filters, f) 221 i += endIdx + 1 222 } else if pattern[i] == multiRangeStartChar { 223 // Found '{', create equality filter for chars before this if any and then 224 // use vals before next '}' to create multiCharRange filter. 225 if eqIdx != -1 { 226 filters = append(filters, newEqualityChainFilter(pattern[eqIdx:i], backwards)) 227 eqIdx = -1 228 } 229 230 endIdx := bytes.IndexRune(pattern[i+1:], multiRangeEndChar) 231 if endIdx == -1 { 232 return nil, errInvalidFilterPattern 233 } 234 235 f, err := newMultiCharSequenceFilter(pattern[i+1:i+1+endIdx], backwards) 236 if err != nil { 237 return nil, errInvalidFilterPattern 238 } 239 240 filters = append(filters, f) 241 i += endIdx + 1 242 } else if pattern[i] == singleAnyChar { 243 // Found '?', create equality filter for chars before this one if any and then 244 // attach singleAnyCharFilter to chain. 245 if eqIdx != -1 { 246 filters = append(filters, newEqualityChainFilter(pattern[eqIdx:i], backwards)) 247 eqIdx = -1 248 } 249 250 filters = append(filters, newSingleAnyCharFilter(backwards)) 251 } else if eqIdx == -1 { 252 // Normal char, need to mark index to start next equality filter. 253 eqIdx = i 254 } 255 } 256 257 if eqIdx != -1 { 258 filters = append(filters, newEqualityChainFilter(pattern[eqIdx:], backwards)) 259 } 260 261 return newMultiChainFilter(filters, seg, backwards), nil 262 } 263 264 // allowFilter is a filter that allows all. 265 type allowFilter struct{} 266 267 func newAllowFilter() Filter { return newImmutableFilter(allowAllFilter) } 268 func (f allowFilter) String() string { return "All" } 269 func (f allowFilter) Matches(val []byte) bool { return true } 270 271 // equalityFilter is a filter that matches exact values. 272 type equalityFilter struct { 273 pattern []byte 274 } 275 276 func newEqualityFilter(pattern []byte) Filter { 277 return newImmutableFilter(&equalityFilter{pattern: pattern}) 278 } 279 280 func (f *equalityFilter) String() string { 281 return "Equals(\"" + string(f.pattern) + "\")" 282 } 283 284 func (f *equalityFilter) Matches(val []byte) bool { 285 return bytes.Equal(f.pattern, val) 286 } 287 288 // containsFilter is a filter that performs contains matches. 289 type containsFilter struct { 290 pattern []byte 291 } 292 293 func newContainsFilter(pattern []byte) (Filter, error) { 294 if bytes.ContainsAny(pattern, invalidNestedChars) { 295 return nil, errInvalidFilterPattern 296 } 297 298 return newImmutableFilter(&containsFilter{pattern: pattern}), nil 299 } 300 301 func (f *containsFilter) String() string { 302 return "Contains(\"" + string(f.pattern) + "\")" 303 } 304 305 func (f *containsFilter) Matches(val []byte) bool { 306 return bytes.Contains(val, f.pattern) 307 } 308 309 // negationFilter is a filter that matches the opposite of the provided filter. 310 type negationFilter struct { 311 filter Filter 312 } 313 314 func newNegationFilter(filter Filter) Filter { 315 return newImmutableFilter(&negationFilter{filter: filter}) 316 } 317 318 func (f *negationFilter) String() string { 319 return "Not(" + f.filter.String() + ")" 320 } 321 322 func (f *negationFilter) Matches(val []byte) bool { 323 return !f.filter.Matches(val) 324 } 325 326 // multiFilter chains multiple filters together with a logicalOp. 327 type multiFilter struct { 328 filters []Filter 329 op LogicalOp 330 } 331 332 // NewMultiFilter returns a filter that chains multiple filters together 333 // using a LogicalOp. 334 func NewMultiFilter(filters []Filter, op LogicalOp) Filter { 335 return newImmutableFilter(&multiFilter{filters: filters, op: op}) 336 } 337 338 func (f *multiFilter) String() string { 339 separator := " " + string(f.op) + " " 340 var buf bytes.Buffer 341 numFilters := len(f.filters) 342 for i := 0; i < numFilters; i++ { 343 buf.WriteString(f.filters[i].String()) 344 if i < numFilters-1 { 345 buf.WriteString(separator) 346 } 347 } 348 return buf.String() 349 } 350 351 func (f *multiFilter) Matches(val []byte) bool { 352 if len(f.filters) == 0 { 353 return true 354 } 355 356 for _, filter := range f.filters { 357 match := filter.Matches(val) 358 if f.op == Conjunction && !match { 359 return false 360 } 361 362 if f.op == Disjunction && match { 363 return true 364 } 365 } 366 367 return f.op == Conjunction 368 } 369 370 // chainFilter matches an input string against certain conditions 371 // while returning the unmatched part of the input if there is a match. 372 type chainFilter interface { 373 fmt.Stringer 374 375 matches(val []byte) ([]byte, bool) 376 } 377 378 // equalityChainFilter is a filter that performs equality string matches 379 // from either the front or back of the string. 380 type equalityChainFilter struct { 381 pattern []byte 382 backwards bool 383 } 384 385 func newEqualityChainFilter(pattern []byte, backwards bool) chainFilter { 386 return &equalityChainFilter{pattern: pattern, backwards: backwards} 387 } 388 389 func (f *equalityChainFilter) String() string { 390 return "Equals(\"" + string(f.pattern) + "\")" 391 } 392 393 func (f *equalityChainFilter) matches(val []byte) ([]byte, bool) { 394 if f.backwards && bytes.HasSuffix(val, f.pattern) { 395 return val[:len(val)-len(f.pattern)], true 396 } 397 398 if !f.backwards && bytes.HasPrefix(val, f.pattern) { 399 return val[len(f.pattern):], true 400 } 401 402 return nil, false 403 } 404 405 // singleAnyCharFilter is a filter that allows any one char. 406 type singleAnyCharFilter struct { 407 backwards bool 408 } 409 410 func newSingleAnyCharFilter(backwards bool) chainFilter { 411 if backwards { 412 return singleAnyCharFilterBackwards 413 } 414 415 return singleAnyCharFilterForwards 416 } 417 418 func (f *singleAnyCharFilter) String() string { return "AnyChar" } 419 420 func (f *singleAnyCharFilter) matches(val []byte) ([]byte, bool) { 421 if len(val) == 0 { 422 return nil, false 423 } 424 425 if f.backwards { 426 return val[:len(val)-1], true 427 } 428 429 return val[1:], true 430 } 431 432 // newSingleRangeFilter creates a filter that performs range matching 433 // on a single char. 434 func newSingleRangeFilter(pattern []byte, backwards bool) (chainFilter, error) { 435 if len(pattern) == 0 { 436 return nil, errInvalidFilterPattern 437 } 438 439 negate := false 440 if pattern[0] == negationChar { 441 negate = true 442 pattern = pattern[1:] 443 } 444 445 if len(pattern) > 1 && pattern[1] == rangeChar { 446 // If there is a '-' char at position 2, look for repeated instances 447 // of a-z. 448 if len(pattern)%3 != 0 { 449 return nil, errInvalidFilterPattern 450 } 451 452 patterns := make([][]byte, 0, len(pattern)%3) 453 for i := 0; i < len(pattern); i += 3 { 454 if pattern[i+1] != rangeChar || pattern[i] > pattern[i+2] { 455 return nil, errInvalidFilterPattern 456 } 457 458 patterns = append(patterns, pattern[i:i+3]) 459 } 460 461 return &singleRangeFilter{patterns: patterns, backwards: backwards, negate: negate}, nil 462 } 463 464 return &singleCharSetFilter{pattern: pattern, backwards: backwards, negate: negate}, nil 465 } 466 467 // singleRangeFilter is a filter that performs a single character match against 468 // a range of chars given in a range format eg. [a-z]. 469 type singleRangeFilter struct { 470 patterns [][]byte 471 backwards bool 472 negate bool 473 } 474 475 func (f *singleRangeFilter) String() string { 476 var negatePrefix, negateSuffix string 477 if f.negate { 478 negatePrefix = "Not(" 479 negateSuffix = ")" 480 } 481 482 return negatePrefix + "Range(\"" + 483 string(bytes.Join(f.patterns, []byte(fmt.Sprintf(" %s ", Disjunction)))) + 484 "\")" + negateSuffix 485 } 486 487 func (f *singleRangeFilter) matches(val []byte) ([]byte, bool) { 488 if len(val) == 0 { 489 return nil, false 490 } 491 492 match := false 493 idx := 0 494 remainder := val[1:] 495 if f.backwards { 496 idx = len(val) - 1 497 remainder = val[:idx] 498 } 499 500 for _, pattern := range f.patterns { 501 if val[idx] >= pattern[0] && val[idx] <= pattern[2] { 502 match = true 503 break 504 } 505 } 506 507 if f.negate { 508 match = !match 509 } 510 511 return remainder, match 512 } 513 514 // singleCharSetFilter is a filter that performs a single character match against 515 // a set of chars given explicitly eg. [abcdefg]. 516 type singleCharSetFilter struct { 517 pattern []byte 518 backwards bool 519 negate bool 520 } 521 522 func (f *singleCharSetFilter) String() string { 523 var negatePrefix, negateSuffix string 524 if f.negate { 525 negatePrefix = "Not(" 526 negateSuffix = ")" 527 } 528 529 return negatePrefix + "Range(\"" + string(f.pattern) + "\")" + negateSuffix 530 } 531 532 func (f *singleCharSetFilter) matches(val []byte) ([]byte, bool) { 533 if len(val) == 0 { 534 return nil, false 535 } 536 537 match := false 538 for i := 0; i < len(f.pattern); i++ { 539 if f.backwards && val[len(val)-1] == f.pattern[i] { 540 match = true 541 break 542 } 543 544 if !f.backwards && val[0] == f.pattern[i] { 545 match = true 546 break 547 } 548 } 549 550 if f.negate { 551 match = !match 552 } 553 554 if f.backwards { 555 return val[:len(val)-1], match 556 } 557 558 return val[1:], match 559 } 560 561 // multiCharRangeFilter is a filter that performs matches against multiple sets of chars 562 // eg. {abc,defg}. 563 type multiCharSequenceFilter struct { 564 patterns [][]byte 565 backwards bool 566 } 567 568 func newMultiCharSequenceFilter(patterns []byte, backwards bool) (chainFilter, error) { 569 if len(patterns) == 0 { 570 return nil, errInvalidFilterPattern 571 } 572 573 return &multiCharSequenceFilter{ 574 patterns: bytes.Split(patterns, multiRangeSplit), 575 backwards: backwards, 576 }, nil 577 } 578 579 func (f *multiCharSequenceFilter) String() string { 580 return "Range(\"" + string(bytes.Join(f.patterns, multiRangeSplit)) + "\")" 581 } 582 583 func (f *multiCharSequenceFilter) matches(val []byte) ([]byte, bool) { 584 if len(val) == 0 { 585 return nil, false 586 } 587 588 var matchIndex int 589 var bestPattern []byte 590 for _, pattern := range f.patterns { 591 if len(pattern) > len(val) { 592 continue 593 } 594 595 if f.backwards { 596 if bytes.HasSuffix(val, pattern) { 597 if len(pattern) > len(bestPattern) { 598 bestPattern = pattern 599 matchIndex = len(val) - len(pattern) 600 // No need to continue searching through remaining patterns if a complete match is found 601 if len(bestPattern) == len(val) { 602 break 603 } 604 } 605 } 606 } else { 607 if bytes.HasPrefix(val, pattern) { 608 if len(pattern) > len(bestPattern) { 609 bestPattern = pattern 610 matchIndex = len(pattern) 611 // No need to continue searching through remaining patterns if a complete match is found 612 if len(bestPattern) == len(val) { 613 break 614 } 615 } 616 } 617 } 618 } 619 620 if bestPattern != nil { 621 if f.backwards { 622 return val[:matchIndex], true 623 } 624 return val[matchIndex:], true 625 } 626 627 return nil, false 628 } 629 630 // multiChainFilter chains multiple chainFilters together with &&. 631 type multiChainFilter struct { 632 filters []chainFilter 633 seg chainSegment 634 backwards bool 635 } 636 637 // newMultiChainFilter creates a new multiChainFilter from given chainFilters. 638 func newMultiChainFilter(filters []chainFilter, seg chainSegment, backwards bool) Filter { 639 return newImmutableFilter(&multiChainFilter{filters: filters, seg: seg, backwards: backwards}) 640 } 641 642 func (f *multiChainFilter) String() string { 643 separator := " then " 644 var buf bytes.Buffer 645 switch f.seg { 646 case start: 647 buf.WriteString("StartsWith(") 648 case end: 649 buf.WriteString("EndsWith(") 650 } 651 652 numFilters := len(f.filters) 653 for i := 0; i < numFilters; i++ { 654 buf.WriteString(f.filters[i].String()) 655 if i < numFilters-1 { 656 buf.WriteString(separator) 657 } 658 } 659 660 switch f.seg { 661 case start, end: 662 buf.WriteString(")") 663 } 664 665 return buf.String() 666 } 667 668 func (f *multiChainFilter) Matches(val []byte) bool { 669 if len(f.filters) == 0 { 670 return true 671 } 672 673 var match bool 674 675 if f.backwards { 676 for i := len(f.filters) - 1; i >= 0; i-- { 677 val, match = f.filters[i].matches(val) 678 if !match { 679 return false 680 } 681 } 682 } else { 683 for i := 0; i < len(f.filters); i++ { 684 val, match = f.filters[i].matches(val) 685 if !match { 686 return false 687 } 688 } 689 } 690 691 if f.seg == middle && len(val) != 0 { 692 // chain was middle segment and some value was left over at end of chain. 693 return false 694 } 695 696 return true 697 } 698 699 type immutableFilter struct { 700 f filter 701 } 702 703 func newImmutableFilter(f filter) Filter { 704 return immutableFilter{f: f} 705 } 706 707 func (f immutableFilter) String() string { 708 return f.f.String() 709 } 710 711 func (f immutableFilter) Matches(val []byte) bool { 712 return f.f.Matches(val) 713 } 714 715 func (f immutableFilter) Clone() Filter { 716 return f 717 }