github.com/grafana/pyroscope@v1.18.0/pkg/parquet/row_writer_test.go (about)

     1  package parquet
     2  
     3  import (
     4  	"testing"
     5  
     6  	"github.com/parquet-go/parquet-go"
     7  	"github.com/stretchr/testify/require"
     8  )
     9  
    10  var _ RowWriterFlusher = (*TestRowGroupWriter)(nil)
    11  
    12  type TestRowGroupWriter struct {
    13  	RowGroups       [][]parquet.Row
    14  	currentRowGroup int
    15  }
    16  
    17  func (r *TestRowGroupWriter) WriteRows(rows []parquet.Row) (int, error) {
    18  	if len(r.RowGroups) <= r.currentRowGroup {
    19  		r.RowGroups = append(r.RowGroups, []parquet.Row{})
    20  	}
    21  	r.RowGroups[r.currentRowGroup] = append(r.RowGroups[r.currentRowGroup], rows...)
    22  	return len(rows), nil
    23  }
    24  
    25  func (r *TestRowGroupWriter) Flush() error {
    26  	r.currentRowGroup++
    27  	return nil
    28  }
    29  
    30  func TestCopyAsRowGroups(t *testing.T) {
    31  	for _, tc := range []struct {
    32  		name             string
    33  		rowGroupNumCount int
    34  		reader           parquet.RowReader
    35  		expected         [][]parquet.Row
    36  	}{
    37  		{
    38  			"empty",
    39  			1,
    40  			EmptyRowReader,
    41  			nil,
    42  		},
    43  		{
    44  			"one row",
    45  			1,
    46  			NewBatchReader([][]parquet.Row{
    47  				{{parquet.Int32Value(1)}},
    48  			}),
    49  			[][]parquet.Row{
    50  				{{parquet.Int32Value(1)}},
    51  			},
    52  		},
    53  		{
    54  			"one row per group",
    55  			1,
    56  			NewBatchReader([][]parquet.Row{
    57  				{{parquet.Int32Value(1)}},
    58  				{{parquet.Int32Value(2)}, {parquet.Int32Value(3)}},
    59  				{{parquet.Int32Value(4)}},
    60  			}),
    61  			[][]parquet.Row{
    62  				{{parquet.Int32Value(1)}},
    63  				{{parquet.Int32Value(2)}},
    64  				{{parquet.Int32Value(3)}},
    65  				{{parquet.Int32Value(4)}},
    66  			},
    67  		},
    68  		{
    69  			"two row per group",
    70  			2,
    71  			NewBatchReader([][]parquet.Row{
    72  				{{parquet.Int32Value(1)}},
    73  				{{parquet.Int32Value(2)}, {parquet.Int32Value(3)}},
    74  				{{parquet.Int32Value(4)}},
    75  			}),
    76  			[][]parquet.Row{
    77  				{{parquet.Int32Value(1)}, {parquet.Int32Value(2)}},
    78  				{{parquet.Int32Value(3)}, {parquet.Int32Value(4)}},
    79  			},
    80  		},
    81  		{
    82  			"two row per group not full",
    83  			2,
    84  			NewBatchReader([][]parquet.Row{
    85  				{{parquet.Int32Value(1)}},
    86  				{{parquet.Int32Value(2)}, {parquet.Int32Value(3)}, {parquet.Int32Value(4)}, {parquet.Int32Value(5)}},
    87  			}),
    88  			[][]parquet.Row{
    89  				{{parquet.Int32Value(1)}, {parquet.Int32Value(2)}},
    90  				{{parquet.Int32Value(3)}, {parquet.Int32Value(4)}},
    91  				{{parquet.Int32Value(5)}},
    92  			},
    93  		},
    94  		{
    95  			"more in the group than the reader can read",
    96  			10000,
    97  			NewBatchReader([][]parquet.Row{
    98  				{{parquet.Int32Value(1)}},
    99  				{{parquet.Int32Value(2)}, {parquet.Int32Value(3)}, {parquet.Int32Value(4)}, {parquet.Int32Value(5)}},
   100  			}),
   101  			[][]parquet.Row{
   102  				{
   103  					{parquet.Int32Value(1)},
   104  					{parquet.Int32Value(2)},
   105  					{parquet.Int32Value(3)},
   106  					{parquet.Int32Value(4)},
   107  					{parquet.Int32Value(5)},
   108  				},
   109  			},
   110  		},
   111  		{
   112  			"more in the reader",
   113  			10000,
   114  			NewBatchReader([][]parquet.Row{
   115  				generateRows(5000),
   116  				generateRows(3000),
   117  			}),
   118  			[][]parquet.Row{
   119  				append(generateRows(5000), generateRows(3000)...),
   120  			},
   121  		},
   122  	} {
   123  		tc := tc
   124  		t.Run(tc.name, func(t *testing.T) {
   125  			writer := &TestRowGroupWriter{}
   126  			total, rowGroupCount, err := CopyAsRowGroups(writer, tc.reader, tc.rowGroupNumCount)
   127  			require.NoError(t, err)
   128  			require.Equal(t, uint64(countRows(tc.expected)), total)
   129  			require.Equal(t, uint64(len(tc.expected)), rowGroupCount)
   130  			require.Equal(t, tc.expected, writer.RowGroups)
   131  		})
   132  	}
   133  }
   134  
   135  func countRows(rows [][]parquet.Row) int {
   136  	count := 0
   137  	for _, r := range rows {
   138  		count += len(r)
   139  	}
   140  	return count
   141  }
   142  
   143  func generateRows(count int) []parquet.Row {
   144  	rows := make([]parquet.Row, count)
   145  	for i := 0; i < count; i++ {
   146  		rows[i] = parquet.Row{parquet.Int32Value(int32(i))}
   147  	}
   148  	return rows
   149  }