github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/query/predicate_test.go (about) 1 package query 2 3 import ( 4 "bytes" 5 "context" 6 "testing" 7 8 "github.com/google/uuid" 9 "github.com/parquet-go/parquet-go" 10 "github.com/stretchr/testify/require" 11 ) 12 13 type dictString struct { 14 S string `parquet:",dict"` 15 } 16 17 type String struct { 18 S string `parquet:",dict"` 19 } 20 21 func TestSubstringPredicate(t *testing.T) { 22 // Normal case - all chunks/pages/values inspected 23 testPredicate(t, predicateTestCase[String]{ 24 predicate: NewSubstringPredicate("b"), 25 keptChunks: 1, 26 keptPages: 1, 27 keptValues: 2, 28 writeData: func(w *parquet.GenericWriter[String]) { 29 _, err := w.Write([]String{{"abc"}}) 30 require.NoError(t, err) // kept 31 _, err = w.Write([]String{{"bcd"}}) 32 require.NoError(t, err) // kept 33 _, err = w.Write([]String{{"cde"}}) 34 require.NoError(t, err) // skipped 35 }, 36 }) 37 38 // Dictionary in the page header allows for skipping a page 39 testPredicate(t, predicateTestCase[dictString]{ 40 predicate: NewSubstringPredicate("x"), // Not present in any values 41 keptChunks: 1, 42 keptPages: 0, 43 keptValues: 0, 44 writeData: func(w *parquet.GenericWriter[dictString]) { 45 _, err := w.Write([]dictString{{"abc"}}) 46 require.NoError(t, err) 47 _, err = w.Write([]dictString{{"bcd"}}) 48 require.NoError(t, err) 49 _, err = w.Write([]dictString{{"cde"}}) 50 require.NoError(t, err) 51 }, 52 }) 53 } 54 55 type predicateTestCase[P any] struct { 56 writeData func(w *parquet.GenericWriter[P]) 57 keptChunks int 58 keptPages int 59 keptValues int 60 predicate Predicate 61 } 62 63 // testPredicate by writing data and then iterating the column. The data model 64 // must contain a single column. 65 func testPredicate[T any](t *testing.T, tc predicateTestCase[T]) { 66 buf := new(bytes.Buffer) 67 w := parquet.NewGenericWriter[T](buf) 68 tc.writeData(w) 69 w.Flush() 70 w.Close() 71 72 file := bytes.NewReader(buf.Bytes()) 73 r, err := parquet.OpenFile(file, int64(buf.Len())) 74 require.NoError(t, err) 75 76 p := InstrumentedPredicate{pred: tc.predicate} 77 78 i := NewSyncIterator(context.TODO(), r.RowGroups(), 0, "test", 100, &p, "") 79 for i.Next() { 80 } 81 82 require.Equal(t, tc.keptChunks, int(p.KeptColumnChunks.Load()), "keptChunks") 83 require.Equal(t, tc.keptPages, int(p.KeptPages.Load()), "keptPages") 84 require.Equal(t, tc.keptValues, int(p.KeptValues.Load()), "keptValues") 85 } 86 87 func BenchmarkSubstringPredicate(b *testing.B) { 88 p := NewSubstringPredicate("abc") 89 90 s := make([]parquet.Value, 1000) 91 for i := 0; i < 1000; i++ { 92 s[i] = parquet.ValueOf(uuid.New().String()) 93 } 94 95 b.ResetTimer() 96 97 for i := 0; i < b.N; i++ { 98 for _, ss := range s { 99 p.KeepValue(ss) 100 } 101 } 102 }