github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/reader_go18_test.go (about) 1 //go:build go1.18 2 3 package parquet_test 4 5 import ( 6 "bytes" 7 "errors" 8 "fmt" 9 "io" 10 "math/rand" 11 "reflect" 12 "testing" 13 14 "github.com/vc42/parquet-go" 15 ) 16 17 func TestGenericReader(t *testing.T) { 18 testGenericReader[booleanColumn](t) 19 testGenericReader[int32Column](t) 20 testGenericReader[int64Column](t) 21 testGenericReader[int96Column](t) 22 testGenericReader[floatColumn](t) 23 testGenericReader[doubleColumn](t) 24 testGenericReader[byteArrayColumn](t) 25 testGenericReader[fixedLenByteArrayColumn](t) 26 testGenericReader[stringColumn](t) 27 testGenericReader[indexedStringColumn](t) 28 testGenericReader[uuidColumn](t) 29 testGenericReader[mapColumn](t) 30 testGenericReader[decimalColumn](t) 31 testGenericReader[addressBook](t) 32 testGenericReader[contact](t) 33 testGenericReader[listColumn2](t) 34 testGenericReader[listColumn1](t) 35 testGenericReader[listColumn0](t) 36 testGenericReader[nestedListColumn1](t) 37 testGenericReader[nestedListColumn](t) 38 testGenericReader[*contact](t) 39 testGenericReader[paddedBooleanColumn](t) 40 testGenericReader[optionalInt32Column](t) 41 testGenericReader[repeatedInt32Column](t) 42 } 43 44 func testGenericReader[Row any](t *testing.T) { 45 var model Row 46 t.Run(reflect.TypeOf(model).Name(), func(t *testing.T) { 47 err := quickCheck(func(rows []Row) bool { 48 if len(rows) == 0 { 49 return true // TODO: fix support for parquet files with zero rows 50 } 51 if err := testGenericReaderRows(rows); err != nil { 52 t.Error(err) 53 return false 54 } 55 return true 56 }) 57 if err != nil { 58 t.Error(err) 59 } 60 }) 61 } 62 63 func testGenericReaderRows[Row any](rows []Row) error { 64 setNullPointers(rows) 65 buffer := new(bytes.Buffer) 66 writer := parquet.NewGenericWriter[Row](buffer) 67 _, err := writer.Write(rows) 68 if err != nil { 69 return err 70 } 71 if err := writer.Close(); err != nil { 72 return err 73 } 74 reader := parquet.NewGenericReader[Row](bytes.NewReader(buffer.Bytes())) 75 result := make([]Row, len(rows)) 76 n, err := reader.Read(result) 77 if err != nil && !errors.Is(err, io.EOF) { 78 return err 79 } 80 if n < len(rows) { 81 return fmt.Errorf("not enough values were read: want=%d got=%d", len(rows), n) 82 } 83 if !reflect.DeepEqual(rows, result) { 84 return fmt.Errorf("rows mismatch:\nwant: %+v\ngot: %+v", rows, result) 85 } 86 return nil 87 } 88 89 func BenchmarkGenericReader(b *testing.B) { 90 benchmarkGenericReader[benchmarkRowType](b) 91 benchmarkGenericReader[booleanColumn](b) 92 benchmarkGenericReader[int32Column](b) 93 benchmarkGenericReader[int64Column](b) 94 benchmarkGenericReader[floatColumn](b) 95 benchmarkGenericReader[doubleColumn](b) 96 benchmarkGenericReader[byteArrayColumn](b) 97 benchmarkGenericReader[fixedLenByteArrayColumn](b) 98 benchmarkGenericReader[stringColumn](b) 99 benchmarkGenericReader[indexedStringColumn](b) 100 benchmarkGenericReader[uuidColumn](b) 101 benchmarkGenericReader[mapColumn](b) 102 benchmarkGenericReader[decimalColumn](b) 103 benchmarkGenericReader[contact](b) 104 benchmarkGenericReader[paddedBooleanColumn](b) 105 benchmarkGenericReader[optionalInt32Column](b) 106 } 107 108 func benchmarkGenericReader[Row generator[Row]](b *testing.B) { 109 var model Row 110 b.Run(reflect.TypeOf(model).Name(), func(b *testing.B) { 111 prng := rand.New(rand.NewSource(0)) 112 rows := make([]Row, benchmarkNumRows) 113 for i := range rows { 114 rows[i] = rows[i].generate(prng) 115 } 116 117 rowbuf := make([]Row, benchmarkRowsPerStep) 118 buffer := parquet.NewGenericBuffer[Row]() 119 buffer.Write(rows) 120 121 b.Run("go1.17", func(b *testing.B) { 122 reader := parquet.NewRowGroupReader(buffer) 123 benchmarkRowsPerSecond(b, func() int { 124 for i := range rowbuf { 125 if err := reader.Read(&rowbuf[i]); err != nil { 126 if err != io.EOF { 127 b.Fatal(err) 128 } else { 129 reader.Reset() 130 } 131 } 132 } 133 return len(rowbuf) 134 }) 135 }) 136 137 b.Run("go1.18", func(b *testing.B) { 138 reader := parquet.NewGenericRowGroupReader[Row](buffer) 139 benchmarkRowsPerSecond(b, func() int { 140 n, err := reader.Read(rowbuf) 141 if err != nil { 142 if err != io.EOF { 143 b.Fatal(err) 144 } else { 145 reader.Reset() 146 } 147 } 148 return n 149 }) 150 }) 151 }) 152 }