github.com/grafana/pyroscope@v1.18.0/pkg/parquet/row_reader_test.go (about)

     1  package parquet
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"math"
     7  	"testing"
     8  
     9  	"github.com/parquet-go/parquet-go"
    10  	"github.com/stretchr/testify/require"
    11  )
    12  
    13  var _ parquet.RowReader = (*BatchReader)(nil)
    14  
    15  type BatchReader struct {
    16  	batches [][]parquet.Row
    17  }
    18  
    19  func NewBatchReader(batches [][]parquet.Row) *BatchReader {
    20  	return &BatchReader{batches: batches}
    21  }
    22  
    23  func (br *BatchReader) ReadRows(rows []parquet.Row) (int, error) {
    24  	if len(br.batches) == 0 {
    25  		return 0, io.EOF
    26  	}
    27  	n := copy(rows, br.batches[0])
    28  	if n < len(br.batches[0]) {
    29  		br.batches[0] = br.batches[0][n:]
    30  		return n, nil
    31  	}
    32  	br.batches = br.batches[1:]
    33  	return n, nil
    34  }
    35  
    36  func TestBufferedRowReaderIterator(t *testing.T) {
    37  	testBatchSize := func(n int) func(t *testing.T) {
    38  		return func(t *testing.T) {
    39  			reader := NewBufferedRowReaderIterator(
    40  				NewBatchReader(
    41  					[][]parquet.Row{
    42  						{{parquet.Int32Value(1)}},
    43  						{{parquet.Int32Value(2)}, {parquet.Int32Value(3)}},
    44  						{{parquet.Int32Value(4)}},
    45  					}),
    46  				n)
    47  			require.True(t, reader.Next())
    48  			require.Equal(t, parquet.Int32Value(1), reader.At()[0])
    49  			require.True(t, reader.Next())
    50  			require.Equal(t, parquet.Int32Value(2), reader.At()[0])
    51  			require.True(t, reader.Next())
    52  			require.Equal(t, parquet.Int32Value(3), reader.At()[0])
    53  			require.True(t, reader.Next())
    54  			require.Equal(t, parquet.Int32Value(4), reader.At()[0])
    55  			require.False(t, reader.Next())
    56  		}
    57  	}
    58  	t.Run("batch of 1", testBatchSize(1))
    59  	t.Run("bigger batch", testBatchSize(100))
    60  	t.Run("equal batch", testBatchSize(2))
    61  }
    62  
    63  func TestNewMergeRowReader(t *testing.T) {
    64  	for _, batchSize := range []int{1, 2, 3, 4, 5, 6} {
    65  		bufferSize := batchSize
    66  		t.Run(fmt.Sprintf("%d", bufferSize), func(t *testing.T) {
    67  			for _, tc := range []struct {
    68  				name     string
    69  				readers  []parquet.RowReader
    70  				expected []parquet.Row
    71  			}{
    72  				{
    73  					"merge 1 readers",
    74  					[]parquet.RowReader{
    75  						NewBatchReader([][]parquet.Row{
    76  							{{parquet.Int32Value(1)}},
    77  							{{parquet.Int32Value(3)}},
    78  							{{parquet.Int32Value(5)}},
    79  						}),
    80  					},
    81  					[]parquet.Row{
    82  						{parquet.Int32Value(1)},
    83  						{parquet.Int32Value(3)},
    84  						{parquet.Int32Value(5)},
    85  					},
    86  				},
    87  				{
    88  					"merge 2 readers",
    89  					[]parquet.RowReader{
    90  						NewBatchReader([][]parquet.Row{
    91  							{{parquet.Int32Value(1)}},
    92  							{{parquet.Int32Value(3)}},
    93  							{{parquet.Int32Value(5)}},
    94  						}),
    95  						NewBatchReader([][]parquet.Row{
    96  							{{parquet.Int32Value(2)}},
    97  							{{parquet.Int32Value(4)}},
    98  							{{parquet.Int32Value(6)}},
    99  						}),
   100  					},
   101  					[]parquet.Row{
   102  						{parquet.Int32Value(1)},
   103  						{parquet.Int32Value(2)},
   104  						{parquet.Int32Value(3)},
   105  						{parquet.Int32Value(4)},
   106  						{parquet.Int32Value(5)},
   107  						{parquet.Int32Value(6)},
   108  					},
   109  				},
   110  				{
   111  					"merge 3 readers 1 value",
   112  					[]parquet.RowReader{
   113  						NewBatchReader([][]parquet.Row{
   114  							{{parquet.Int32Value(1)}},
   115  						}),
   116  						NewBatchReader([][]parquet.Row{
   117  							{{parquet.Int32Value(2)}},
   118  						}),
   119  						NewBatchReader([][]parquet.Row{
   120  							{{parquet.Int32Value(3)}},
   121  						}),
   122  					},
   123  					[]parquet.Row{
   124  						{parquet.Int32Value(1)},
   125  						{parquet.Int32Value(2)},
   126  						{parquet.Int32Value(3)},
   127  					},
   128  				},
   129  			} {
   130  				tc := tc
   131  				t.Run(tc.name, func(t *testing.T) {
   132  					reader := NewMergeRowReader(tc.readers, parquet.Row{parquet.Int32Value(math.MaxInt32)}, func(r1, r2 parquet.Row) bool {
   133  						return r1[0].Int32() < r2[0].Int32()
   134  					})
   135  
   136  					actual, err := ReadAllWithBufferSize(reader, bufferSize)
   137  					require.NoError(t, err)
   138  					require.Equal(t, tc.expected, actual)
   139  				})
   140  			}
   141  		})
   142  	}
   143  }
   144  
   145  func TestIteratorRowReader(t *testing.T) {
   146  	it := NewIteratorRowReader(
   147  		NewBufferedRowReaderIterator(NewBatchReader([][]parquet.Row{
   148  			{{parquet.Int32Value(1)}, {parquet.Int32Value(2)}, {parquet.Int32Value(3)}},
   149  			{{parquet.Int32Value(4)}, {parquet.Int32Value(5)}, {parquet.Int32Value(6)}},
   150  			{{parquet.Int32Value(7)}, {parquet.Int32Value(8)}, {parquet.Int32Value(9)}},
   151  		}), 4),
   152  	)
   153  	actual, err := ReadAllWithBufferSize(it, 3)
   154  	require.NoError(t, err)
   155  	require.Equal(t, []parquet.Row{
   156  		{parquet.Int32Value(1)},
   157  		{parquet.Int32Value(2)},
   158  		{parquet.Int32Value(3)},
   159  		{parquet.Int32Value(4)},
   160  		{parquet.Int32Value(5)},
   161  		{parquet.Int32Value(6)},
   162  		{parquet.Int32Value(7)},
   163  		{parquet.Int32Value(8)},
   164  		{parquet.Int32Value(9)},
   165  	}, actual)
   166  }
   167  
   168  type SomeRow struct {
   169  	Col1 int
   170  }
   171  
   172  func BenchmarkBufferedRowReader(b *testing.B) {
   173  	buff := parquet.NewGenericBuffer[SomeRow]()
   174  	for i := 0; i < 1000000; i++ {
   175  		_, err := buff.Write([]SomeRow{{Col1: (i)}})
   176  		if err != nil {
   177  			b.Fatal(err)
   178  		}
   179  	}
   180  	reader := NewBufferedRowReaderIterator(buff.Rows(), 100)
   181  	defer reader.Close()
   182  	b.ResetTimer()
   183  	for i := 0; i < b.N; i++ {
   184  		for reader.Next() {
   185  			_ = reader.At()
   186  		}
   187  	}
   188  }