github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/query/predicates.go (about)

     1  package query
     2  
     3  import (
     4  	"bytes"
     5  	"strings"
     6  
     7  	pq "github.com/parquet-go/parquet-go"
     8  	"go.uber.org/atomic"
     9  	"golang.org/x/exp/constraints"
    10  )
    11  
    12  // Predicate is a pushdown predicate that can be applied at
    13  // the chunk, page, and value levels.
    14  type Predicate interface {
    15  	KeepColumnChunk(ci pq.ColumnIndex) bool
    16  	KeepPage(page pq.Page) bool
    17  	KeepValue(pq.Value) bool
    18  }
    19  
    20  // StringInPredicate checks for any of the given strings.
    21  type StringInPredicate struct {
    22  	ss [][]byte
    23  }
    24  
    25  var _ Predicate = (*StringInPredicate)(nil)
    26  
    27  func NewStringInPredicate(ss []string) Predicate {
    28  	p := &StringInPredicate{
    29  		ss: make([][]byte, len(ss)),
    30  	}
    31  	for i := range ss {
    32  		p.ss[i] = []byte(ss[i])
    33  	}
    34  	return p
    35  }
    36  
    37  func (p *StringInPredicate) KeepColumnChunk(ci pq.ColumnIndex) bool {
    38  	if ci != nil {
    39  
    40  		for _, subs := range p.ss {
    41  			for i := 0; i < ci.NumPages(); i++ {
    42  				ok := bytes.Compare(ci.MinValue(i).ByteArray(), subs) <= 0 && bytes.Compare(ci.MaxValue(i).ByteArray(), subs) >= 0
    43  				if ok {
    44  					// At least one page in this chunk matches
    45  					return true
    46  				}
    47  			}
    48  		}
    49  		return false
    50  	}
    51  
    52  	return true
    53  }
    54  
    55  func (p *StringInPredicate) KeepValue(v pq.Value) bool {
    56  	ba := v.ByteArray()
    57  	for _, ss := range p.ss {
    58  		if bytes.Equal(ba, ss) {
    59  			return true
    60  		}
    61  	}
    62  	return false
    63  }
    64  
    65  func (p *StringInPredicate) KeepPage(page pq.Page) bool {
    66  	// todo: check bounds
    67  
    68  	// If a dictionary column then ensure at least one matching
    69  	// value exists in the dictionary
    70  	dict := page.Dictionary()
    71  	if dict != nil && dict.Len() > 0 {
    72  		len := dict.Len()
    73  
    74  		for i := 0; i < len; i++ {
    75  			dictionaryEntry := dict.Index(int32(i)).ByteArray()
    76  			for _, subs := range p.ss {
    77  				if bytes.Equal(dictionaryEntry, subs) {
    78  					// At least 1 string present in this page
    79  					return true
    80  				}
    81  			}
    82  		}
    83  
    84  		return false
    85  	}
    86  
    87  	return true
    88  }
    89  
    90  type SubstringPredicate struct {
    91  	substring string
    92  	matches   map[string]bool
    93  }
    94  
    95  var _ Predicate = (*SubstringPredicate)(nil)
    96  
    97  func NewSubstringPredicate(substring string) *SubstringPredicate {
    98  	return &SubstringPredicate{
    99  		substring: substring,
   100  		matches:   map[string]bool{},
   101  	}
   102  }
   103  
   104  func (p *SubstringPredicate) KeepColumnChunk(_ pq.ColumnIndex) bool {
   105  	// Reset match cache on each row group change
   106  	p.matches = make(map[string]bool, len(p.matches))
   107  
   108  	// Is there any filtering possible here?
   109  	// Column chunk contains a bloom filter and min/max bounds,
   110  	// but those can't be inspected for a substring match.
   111  	return true
   112  }
   113  
   114  func (p *SubstringPredicate) KeepValue(v pq.Value) bool {
   115  	vs := v.String()
   116  	if m, ok := p.matches[vs]; ok {
   117  		return m
   118  	}
   119  
   120  	m := strings.Contains(vs, p.substring)
   121  	p.matches[vs] = m
   122  	return m
   123  }
   124  
   125  func (p *SubstringPredicate) KeepPage(page pq.Page) bool {
   126  	// If a dictionary column then ensure at least one matching
   127  	// value exists in the dictionary
   128  	dict := page.Dictionary()
   129  	if dict != nil && dict.Len() > 0 {
   130  		len := dict.Len()
   131  		for i := 0; i < len; i++ {
   132  			if p.KeepValue(dict.Index(int32(i))) {
   133  				return true
   134  			}
   135  		}
   136  
   137  		return false
   138  	}
   139  
   140  	return true
   141  }
   142  
   143  // IntBetweenPredicate checks for int between the bounds [min,max] inclusive
   144  type IntBetweenPredicate struct {
   145  	min, max int64
   146  }
   147  
   148  var _ Predicate = (*IntBetweenPredicate)(nil)
   149  
   150  func NewIntBetweenPredicate(min, max int64) *IntBetweenPredicate {
   151  	return &IntBetweenPredicate{min, max}
   152  }
   153  
   154  func (p *IntBetweenPredicate) KeepColumnChunk(ci pq.ColumnIndex) bool {
   155  	if ci != nil {
   156  		for i := 0; i < ci.NumPages(); i++ {
   157  			min := ci.MinValue(i).Int64()
   158  			max := ci.MaxValue(i).Int64()
   159  			if p.max >= min && p.min <= max {
   160  				return true
   161  			}
   162  		}
   163  		return false
   164  	}
   165  
   166  	return true
   167  }
   168  
   169  func (p *IntBetweenPredicate) KeepValue(v pq.Value) bool {
   170  	vv := v.Int64()
   171  	return p.min <= vv && vv <= p.max
   172  }
   173  
   174  func (p *IntBetweenPredicate) KeepPage(page pq.Page) bool {
   175  	if min, max, ok := page.Bounds(); ok {
   176  		return p.max >= min.Int64() && p.min <= max.Int64()
   177  	}
   178  	return true
   179  }
   180  
   181  type EqualInt64Predicate int64
   182  
   183  func NewEqualInt64Predicate(value int64) EqualInt64Predicate {
   184  	return EqualInt64Predicate(value)
   185  }
   186  
   187  func (p EqualInt64Predicate) KeepColumnChunk(ci pq.ColumnIndex) bool {
   188  	if ci != nil {
   189  		for i := 0; i < ci.NumPages(); i++ {
   190  			min := ci.MinValue(i).Int64()
   191  			max := ci.MaxValue(i).Int64()
   192  			if int64(p) >= min && int64(p) <= max {
   193  				return true
   194  			}
   195  		}
   196  		return false
   197  	}
   198  
   199  	return true
   200  }
   201  
   202  func (p EqualInt64Predicate) KeepValue(v pq.Value) bool {
   203  	vv := v.Int64()
   204  	return int64(p) <= vv && vv <= int64(p)
   205  }
   206  
   207  func (p EqualInt64Predicate) KeepPage(page pq.Page) bool {
   208  	if min, max, ok := page.Bounds(); ok {
   209  		return int64(p) >= min.Int64() && int64(p) <= max.Int64()
   210  	}
   211  	return true
   212  }
   213  
   214  type InstrumentedPredicate struct {
   215  	pred                  Predicate // Optional, if missing then just keeps metrics with no filtering
   216  	InspectedColumnChunks atomic.Int64
   217  	InspectedPages        atomic.Int64
   218  	InspectedValues       atomic.Int64
   219  	KeptColumnChunks      atomic.Int64
   220  	KeptPages             atomic.Int64
   221  	KeptValues            atomic.Int64
   222  }
   223  
   224  var _ Predicate = (*InstrumentedPredicate)(nil)
   225  
   226  func (p *InstrumentedPredicate) KeepColumnChunk(ci pq.ColumnIndex) bool {
   227  	p.InspectedColumnChunks.Inc()
   228  
   229  	if p.pred == nil || p.pred.KeepColumnChunk(ci) {
   230  		p.KeptColumnChunks.Inc()
   231  		return true
   232  	}
   233  
   234  	return false
   235  }
   236  
   237  func (p *InstrumentedPredicate) KeepPage(page pq.Page) bool {
   238  	p.InspectedPages.Inc()
   239  
   240  	if p.pred == nil || p.pred.KeepPage(page) {
   241  		p.KeptPages.Inc()
   242  		return true
   243  	}
   244  
   245  	return false
   246  }
   247  
   248  func (p *InstrumentedPredicate) KeepValue(v pq.Value) bool {
   249  	p.InspectedValues.Inc()
   250  
   251  	if p.pred == nil || p.pred.KeepValue(v) {
   252  		p.KeptValues.Inc()
   253  		return true
   254  	}
   255  
   256  	return false
   257  }
   258  
   259  type mapPredicate[K constraints.Integer, V any] struct {
   260  	inbetweenPred Predicate
   261  	m             map[K]V
   262  }
   263  
   264  func NewMapPredicate[K constraints.Integer, V any](m map[K]V) Predicate {
   265  
   266  	var min, max int64
   267  
   268  	first := true
   269  	for k := range m {
   270  		if first || max < int64(k) {
   271  			max = int64(k)
   272  		}
   273  		if first || min > int64(k) {
   274  			min = int64(k)
   275  		}
   276  		first = false
   277  	}
   278  
   279  	return &mapPredicate[K, V]{
   280  		inbetweenPred: NewIntBetweenPredicate(min, max),
   281  		m:             m,
   282  	}
   283  }
   284  
   285  func (m *mapPredicate[K, V]) KeepColumnChunk(ci pq.ColumnIndex) bool {
   286  	return m.inbetweenPred.KeepColumnChunk(ci)
   287  }
   288  
   289  func (m *mapPredicate[K, V]) KeepPage(page pq.Page) bool {
   290  	return m.inbetweenPred.KeepPage(page)
   291  }
   292  
   293  func (m *mapPredicate[K, V]) KeepValue(v pq.Value) bool {
   294  	_, exists := m.m[K(v.Int64())]
   295  	return exists
   296  }