github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/query/predicate_test.go (about)

     1  package query
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"testing"
     7  
     8  	"github.com/google/uuid"
     9  	"github.com/parquet-go/parquet-go"
    10  	"github.com/stretchr/testify/require"
    11  )
    12  
    13  type dictString struct {
    14  	S string `parquet:",dict"`
    15  }
    16  
    17  type String struct {
    18  	S string `parquet:",dict"`
    19  }
    20  
    21  func TestSubstringPredicate(t *testing.T) {
    22  	// Normal case - all chunks/pages/values inspected
    23  	testPredicate(t, predicateTestCase[String]{
    24  		predicate:  NewSubstringPredicate("b"),
    25  		keptChunks: 1,
    26  		keptPages:  1,
    27  		keptValues: 2,
    28  		writeData: func(w *parquet.GenericWriter[String]) {
    29  			_, err := w.Write([]String{{"abc"}})
    30  			require.NoError(t, err) // kept
    31  			_, err = w.Write([]String{{"bcd"}})
    32  			require.NoError(t, err) // kept
    33  			_, err = w.Write([]String{{"cde"}})
    34  			require.NoError(t, err) // skipped
    35  		},
    36  	})
    37  
    38  	// Dictionary in the page header allows for skipping a page
    39  	testPredicate(t, predicateTestCase[dictString]{
    40  		predicate:  NewSubstringPredicate("x"), // Not present in any values
    41  		keptChunks: 1,
    42  		keptPages:  0,
    43  		keptValues: 0,
    44  		writeData: func(w *parquet.GenericWriter[dictString]) {
    45  			_, err := w.Write([]dictString{{"abc"}})
    46  			require.NoError(t, err)
    47  			_, err = w.Write([]dictString{{"bcd"}})
    48  			require.NoError(t, err)
    49  			_, err = w.Write([]dictString{{"cde"}})
    50  			require.NoError(t, err)
    51  		},
    52  	})
    53  }
    54  
    55  type predicateTestCase[P any] struct {
    56  	writeData  func(w *parquet.GenericWriter[P])
    57  	keptChunks int
    58  	keptPages  int
    59  	keptValues int
    60  	predicate  Predicate
    61  }
    62  
    63  // testPredicate by writing data and then iterating the column.  The data model
    64  // must contain a single column.
    65  func testPredicate[T any](t *testing.T, tc predicateTestCase[T]) {
    66  	buf := new(bytes.Buffer)
    67  	w := parquet.NewGenericWriter[T](buf)
    68  	tc.writeData(w)
    69  	w.Flush()
    70  	w.Close()
    71  
    72  	file := bytes.NewReader(buf.Bytes())
    73  	r, err := parquet.OpenFile(file, int64(buf.Len()))
    74  	require.NoError(t, err)
    75  
    76  	p := InstrumentedPredicate{pred: tc.predicate}
    77  
    78  	i := NewSyncIterator(context.TODO(), r.RowGroups(), 0, "test", 100, &p, "")
    79  	for i.Next() {
    80  	}
    81  
    82  	require.Equal(t, tc.keptChunks, int(p.KeptColumnChunks.Load()), "keptChunks")
    83  	require.Equal(t, tc.keptPages, int(p.KeptPages.Load()), "keptPages")
    84  	require.Equal(t, tc.keptValues, int(p.KeptValues.Load()), "keptValues")
    85  }
    86  
    87  func BenchmarkSubstringPredicate(b *testing.B) {
    88  	p := NewSubstringPredicate("abc")
    89  
    90  	s := make([]parquet.Value, 1000)
    91  	for i := 0; i < 1000; i++ {
    92  		s[i] = parquet.ValueOf(uuid.New().String())
    93  	}
    94  
    95  	b.ResetTimer()
    96  
    97  	for i := 0; i < b.N; i++ {
    98  		for _, ss := range s {
    99  			p.KeepValue(ss)
   100  		}
   101  	}
   102  }