github.com/grafana/pyroscope@v1.18.0/pkg/parquet/row_writer_test.go (about) 1 package parquet 2 3 import ( 4 "testing" 5 6 "github.com/parquet-go/parquet-go" 7 "github.com/stretchr/testify/require" 8 ) 9 10 var _ RowWriterFlusher = (*TestRowGroupWriter)(nil) 11 12 type TestRowGroupWriter struct { 13 RowGroups [][]parquet.Row 14 currentRowGroup int 15 } 16 17 func (r *TestRowGroupWriter) WriteRows(rows []parquet.Row) (int, error) { 18 if len(r.RowGroups) <= r.currentRowGroup { 19 r.RowGroups = append(r.RowGroups, []parquet.Row{}) 20 } 21 r.RowGroups[r.currentRowGroup] = append(r.RowGroups[r.currentRowGroup], rows...) 22 return len(rows), nil 23 } 24 25 func (r *TestRowGroupWriter) Flush() error { 26 r.currentRowGroup++ 27 return nil 28 } 29 30 func TestCopyAsRowGroups(t *testing.T) { 31 for _, tc := range []struct { 32 name string 33 rowGroupNumCount int 34 reader parquet.RowReader 35 expected [][]parquet.Row 36 }{ 37 { 38 "empty", 39 1, 40 EmptyRowReader, 41 nil, 42 }, 43 { 44 "one row", 45 1, 46 NewBatchReader([][]parquet.Row{ 47 {{parquet.Int32Value(1)}}, 48 }), 49 [][]parquet.Row{ 50 {{parquet.Int32Value(1)}}, 51 }, 52 }, 53 { 54 "one row per group", 55 1, 56 NewBatchReader([][]parquet.Row{ 57 {{parquet.Int32Value(1)}}, 58 {{parquet.Int32Value(2)}, {parquet.Int32Value(3)}}, 59 {{parquet.Int32Value(4)}}, 60 }), 61 [][]parquet.Row{ 62 {{parquet.Int32Value(1)}}, 63 {{parquet.Int32Value(2)}}, 64 {{parquet.Int32Value(3)}}, 65 {{parquet.Int32Value(4)}}, 66 }, 67 }, 68 { 69 "two row per group", 70 2, 71 NewBatchReader([][]parquet.Row{ 72 {{parquet.Int32Value(1)}}, 73 {{parquet.Int32Value(2)}, {parquet.Int32Value(3)}}, 74 {{parquet.Int32Value(4)}}, 75 }), 76 [][]parquet.Row{ 77 {{parquet.Int32Value(1)}, {parquet.Int32Value(2)}}, 78 {{parquet.Int32Value(3)}, {parquet.Int32Value(4)}}, 79 }, 80 }, 81 { 82 "two row per group not full", 83 2, 84 NewBatchReader([][]parquet.Row{ 85 {{parquet.Int32Value(1)}}, 86 {{parquet.Int32Value(2)}, {parquet.Int32Value(3)}, {parquet.Int32Value(4)}, {parquet.Int32Value(5)}}, 87 }), 88 [][]parquet.Row{ 89 {{parquet.Int32Value(1)}, {parquet.Int32Value(2)}}, 90 {{parquet.Int32Value(3)}, {parquet.Int32Value(4)}}, 91 {{parquet.Int32Value(5)}}, 92 }, 93 }, 94 { 95 "more in the group than the reader can read", 96 10000, 97 NewBatchReader([][]parquet.Row{ 98 {{parquet.Int32Value(1)}}, 99 {{parquet.Int32Value(2)}, {parquet.Int32Value(3)}, {parquet.Int32Value(4)}, {parquet.Int32Value(5)}}, 100 }), 101 [][]parquet.Row{ 102 { 103 {parquet.Int32Value(1)}, 104 {parquet.Int32Value(2)}, 105 {parquet.Int32Value(3)}, 106 {parquet.Int32Value(4)}, 107 {parquet.Int32Value(5)}, 108 }, 109 }, 110 }, 111 { 112 "more in the reader", 113 10000, 114 NewBatchReader([][]parquet.Row{ 115 generateRows(5000), 116 generateRows(3000), 117 }), 118 [][]parquet.Row{ 119 append(generateRows(5000), generateRows(3000)...), 120 }, 121 }, 122 } { 123 tc := tc 124 t.Run(tc.name, func(t *testing.T) { 125 writer := &TestRowGroupWriter{} 126 total, rowGroupCount, err := CopyAsRowGroups(writer, tc.reader, tc.rowGroupNumCount) 127 require.NoError(t, err) 128 require.Equal(t, uint64(countRows(tc.expected)), total) 129 require.Equal(t, uint64(len(tc.expected)), rowGroupCount) 130 require.Equal(t, tc.expected, writer.RowGroups) 131 }) 132 } 133 } 134 135 func countRows(rows [][]parquet.Row) int { 136 count := 0 137 for _, r := range rows { 138 count += len(r) 139 } 140 return count 141 } 142 143 func generateRows(count int) []parquet.Row { 144 rows := make([]parquet.Row, count) 145 for i := 0; i < count; i++ { 146 rows[i] = parquet.Row{parquet.Int32Value(int32(i))} 147 } 148 return rows 149 }