github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/query/predicates.go (about) 1 package query 2 3 import ( 4 "bytes" 5 "strings" 6 7 pq "github.com/parquet-go/parquet-go" 8 "go.uber.org/atomic" 9 "golang.org/x/exp/constraints" 10 ) 11 12 // Predicate is a pushdown predicate that can be applied at 13 // the chunk, page, and value levels. 14 type Predicate interface { 15 KeepColumnChunk(ci pq.ColumnIndex) bool 16 KeepPage(page pq.Page) bool 17 KeepValue(pq.Value) bool 18 } 19 20 // StringInPredicate checks for any of the given strings. 21 type StringInPredicate struct { 22 ss [][]byte 23 } 24 25 var _ Predicate = (*StringInPredicate)(nil) 26 27 func NewStringInPredicate(ss []string) Predicate { 28 p := &StringInPredicate{ 29 ss: make([][]byte, len(ss)), 30 } 31 for i := range ss { 32 p.ss[i] = []byte(ss[i]) 33 } 34 return p 35 } 36 37 func (p *StringInPredicate) KeepColumnChunk(ci pq.ColumnIndex) bool { 38 if ci != nil { 39 40 for _, subs := range p.ss { 41 for i := 0; i < ci.NumPages(); i++ { 42 ok := bytes.Compare(ci.MinValue(i).ByteArray(), subs) <= 0 && bytes.Compare(ci.MaxValue(i).ByteArray(), subs) >= 0 43 if ok { 44 // At least one page in this chunk matches 45 return true 46 } 47 } 48 } 49 return false 50 } 51 52 return true 53 } 54 55 func (p *StringInPredicate) KeepValue(v pq.Value) bool { 56 ba := v.ByteArray() 57 for _, ss := range p.ss { 58 if bytes.Equal(ba, ss) { 59 return true 60 } 61 } 62 return false 63 } 64 65 func (p *StringInPredicate) KeepPage(page pq.Page) bool { 66 // todo: check bounds 67 68 // If a dictionary column then ensure at least one matching 69 // value exists in the dictionary 70 dict := page.Dictionary() 71 if dict != nil && dict.Len() > 0 { 72 len := dict.Len() 73 74 for i := 0; i < len; i++ { 75 dictionaryEntry := dict.Index(int32(i)).ByteArray() 76 for _, subs := range p.ss { 77 if bytes.Equal(dictionaryEntry, subs) { 78 // At least 1 string present in this page 79 return true 80 } 81 } 82 } 83 84 return false 85 } 86 87 return true 88 } 89 90 type SubstringPredicate struct { 91 substring string 92 matches map[string]bool 93 } 94 95 var _ Predicate = (*SubstringPredicate)(nil) 96 97 func NewSubstringPredicate(substring string) *SubstringPredicate { 98 return &SubstringPredicate{ 99 substring: substring, 100 matches: map[string]bool{}, 101 } 102 } 103 104 func (p *SubstringPredicate) KeepColumnChunk(_ pq.ColumnIndex) bool { 105 // Reset match cache on each row group change 106 p.matches = make(map[string]bool, len(p.matches)) 107 108 // Is there any filtering possible here? 109 // Column chunk contains a bloom filter and min/max bounds, 110 // but those can't be inspected for a substring match. 111 return true 112 } 113 114 func (p *SubstringPredicate) KeepValue(v pq.Value) bool { 115 vs := v.String() 116 if m, ok := p.matches[vs]; ok { 117 return m 118 } 119 120 m := strings.Contains(vs, p.substring) 121 p.matches[vs] = m 122 return m 123 } 124 125 func (p *SubstringPredicate) KeepPage(page pq.Page) bool { 126 // If a dictionary column then ensure at least one matching 127 // value exists in the dictionary 128 dict := page.Dictionary() 129 if dict != nil && dict.Len() > 0 { 130 len := dict.Len() 131 for i := 0; i < len; i++ { 132 if p.KeepValue(dict.Index(int32(i))) { 133 return true 134 } 135 } 136 137 return false 138 } 139 140 return true 141 } 142 143 // IntBetweenPredicate checks for int between the bounds [min,max] inclusive 144 type IntBetweenPredicate struct { 145 min, max int64 146 } 147 148 var _ Predicate = (*IntBetweenPredicate)(nil) 149 150 func NewIntBetweenPredicate(min, max int64) *IntBetweenPredicate { 151 return &IntBetweenPredicate{min, max} 152 } 153 154 func (p *IntBetweenPredicate) KeepColumnChunk(ci pq.ColumnIndex) bool { 155 if ci != nil { 156 for i := 0; i < ci.NumPages(); i++ { 157 min := ci.MinValue(i).Int64() 158 max := ci.MaxValue(i).Int64() 159 if p.max >= min && p.min <= max { 160 return true 161 } 162 } 163 return false 164 } 165 166 return true 167 } 168 169 func (p *IntBetweenPredicate) KeepValue(v pq.Value) bool { 170 vv := v.Int64() 171 return p.min <= vv && vv <= p.max 172 } 173 174 func (p *IntBetweenPredicate) KeepPage(page pq.Page) bool { 175 if min, max, ok := page.Bounds(); ok { 176 return p.max >= min.Int64() && p.min <= max.Int64() 177 } 178 return true 179 } 180 181 type EqualInt64Predicate int64 182 183 func NewEqualInt64Predicate(value int64) EqualInt64Predicate { 184 return EqualInt64Predicate(value) 185 } 186 187 func (p EqualInt64Predicate) KeepColumnChunk(ci pq.ColumnIndex) bool { 188 if ci != nil { 189 for i := 0; i < ci.NumPages(); i++ { 190 min := ci.MinValue(i).Int64() 191 max := ci.MaxValue(i).Int64() 192 if int64(p) >= min && int64(p) <= max { 193 return true 194 } 195 } 196 return false 197 } 198 199 return true 200 } 201 202 func (p EqualInt64Predicate) KeepValue(v pq.Value) bool { 203 vv := v.Int64() 204 return int64(p) <= vv && vv <= int64(p) 205 } 206 207 func (p EqualInt64Predicate) KeepPage(page pq.Page) bool { 208 if min, max, ok := page.Bounds(); ok { 209 return int64(p) >= min.Int64() && int64(p) <= max.Int64() 210 } 211 return true 212 } 213 214 type InstrumentedPredicate struct { 215 pred Predicate // Optional, if missing then just keeps metrics with no filtering 216 InspectedColumnChunks atomic.Int64 217 InspectedPages atomic.Int64 218 InspectedValues atomic.Int64 219 KeptColumnChunks atomic.Int64 220 KeptPages atomic.Int64 221 KeptValues atomic.Int64 222 } 223 224 var _ Predicate = (*InstrumentedPredicate)(nil) 225 226 func (p *InstrumentedPredicate) KeepColumnChunk(ci pq.ColumnIndex) bool { 227 p.InspectedColumnChunks.Inc() 228 229 if p.pred == nil || p.pred.KeepColumnChunk(ci) { 230 p.KeptColumnChunks.Inc() 231 return true 232 } 233 234 return false 235 } 236 237 func (p *InstrumentedPredicate) KeepPage(page pq.Page) bool { 238 p.InspectedPages.Inc() 239 240 if p.pred == nil || p.pred.KeepPage(page) { 241 p.KeptPages.Inc() 242 return true 243 } 244 245 return false 246 } 247 248 func (p *InstrumentedPredicate) KeepValue(v pq.Value) bool { 249 p.InspectedValues.Inc() 250 251 if p.pred == nil || p.pred.KeepValue(v) { 252 p.KeptValues.Inc() 253 return true 254 } 255 256 return false 257 } 258 259 type mapPredicate[K constraints.Integer, V any] struct { 260 inbetweenPred Predicate 261 m map[K]V 262 } 263 264 func NewMapPredicate[K constraints.Integer, V any](m map[K]V) Predicate { 265 266 var min, max int64 267 268 first := true 269 for k := range m { 270 if first || max < int64(k) { 271 max = int64(k) 272 } 273 if first || min > int64(k) { 274 min = int64(k) 275 } 276 first = false 277 } 278 279 return &mapPredicate[K, V]{ 280 inbetweenPred: NewIntBetweenPredicate(min, max), 281 m: m, 282 } 283 } 284 285 func (m *mapPredicate[K, V]) KeepColumnChunk(ci pq.ColumnIndex) bool { 286 return m.inbetweenPred.KeepColumnChunk(ci) 287 } 288 289 func (m *mapPredicate[K, V]) KeepPage(page pq.Page) bool { 290 return m.inbetweenPred.KeepPage(page) 291 } 292 293 func (m *mapPredicate[K, V]) KeepValue(v pq.Value) bool { 294 _, exists := m.m[K(v.Int64())] 295 return exists 296 }