github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/buffer_go18_test.go (about) 1 //go:build go1.18 2 3 package parquet_test 4 5 import ( 6 "errors" 7 "fmt" 8 "io" 9 "math/rand" 10 "reflect" 11 "testing" 12 13 "github.com/vc42/parquet-go" 14 ) 15 16 func TestGenericBuffer(t *testing.T) { 17 testGenericBuffer[booleanColumn](t) 18 testGenericBuffer[int32Column](t) 19 testGenericBuffer[int64Column](t) 20 testGenericBuffer[int96Column](t) 21 testGenericBuffer[floatColumn](t) 22 testGenericBuffer[doubleColumn](t) 23 testGenericBuffer[byteArrayColumn](t) 24 testGenericBuffer[fixedLenByteArrayColumn](t) 25 testGenericBuffer[stringColumn](t) 26 testGenericBuffer[indexedStringColumn](t) 27 testGenericBuffer[uuidColumn](t) 28 testGenericBuffer[mapColumn](t) 29 testGenericBuffer[decimalColumn](t) 30 testGenericBuffer[addressBook](t) 31 testGenericBuffer[contact](t) 32 testGenericBuffer[listColumn2](t) 33 testGenericBuffer[listColumn1](t) 34 testGenericBuffer[listColumn0](t) 35 testGenericBuffer[nestedListColumn1](t) 36 testGenericBuffer[nestedListColumn](t) 37 testGenericBuffer[*contact](t) 38 testGenericBuffer[paddedBooleanColumn](t) 39 testGenericBuffer[optionalInt32Column](t) 40 testGenericBuffer[repeatedInt32Column](t) 41 } 42 43 func testGenericBuffer[Row any](t *testing.T) { 44 var model Row 45 t.Run(reflect.TypeOf(model).Name(), func(t *testing.T) { 46 err := quickCheck(func(rows []Row) bool { 47 if len(rows) == 0 { 48 return true // TODO: fix support for parquet files with zero rows 49 } 50 if err := testGenericBufferRows(rows); err != nil { 51 t.Error(err) 52 return false 53 } 54 return true 55 }) 56 if err != nil { 57 t.Error(err) 58 } 59 }) 60 } 61 62 func testGenericBufferRows[Row any](rows []Row) error { 63 setNullPointers(rows) 64 buffer := parquet.NewGenericBuffer[Row]() 65 _, err := buffer.Write(rows) 66 if err != nil { 67 return err 68 } 69 reader := parquet.NewGenericRowGroupReader[Row](buffer) 70 result := make([]Row, len(rows)) 71 n, err := reader.Read(result) 72 if err != nil && !errors.Is(err, io.EOF) { 73 return err 74 } 75 if n < len(rows) { 76 return fmt.Errorf("not enough values were read: want=%d got=%d", len(rows), n) 77 } 78 if !reflect.DeepEqual(rows, result) { 79 return fmt.Errorf("rows mismatch:\nwant: %#v\ngot: %#v", rows, result) 80 } 81 return nil 82 } 83 84 func setNullPointers[Row any](rows []Row) { 85 if len(rows) > 0 && reflect.TypeOf(rows[0]).Kind() == reflect.Pointer { 86 for i := range rows { 87 v := reflect.ValueOf(&rows[i]).Elem() 88 if v.IsNil() { 89 v.Set(reflect.New(v.Type().Elem())) 90 } 91 } 92 } 93 } 94 95 type generator[T any] interface { 96 generate(*rand.Rand) T 97 } 98 99 func BenchmarkGenericBuffer(b *testing.B) { 100 benchmarkGenericBuffer[benchmarkRowType](b) 101 benchmarkGenericBuffer[booleanColumn](b) 102 benchmarkGenericBuffer[int32Column](b) 103 benchmarkGenericBuffer[int64Column](b) 104 benchmarkGenericBuffer[floatColumn](b) 105 benchmarkGenericBuffer[doubleColumn](b) 106 benchmarkGenericBuffer[byteArrayColumn](b) 107 benchmarkGenericBuffer[fixedLenByteArrayColumn](b) 108 benchmarkGenericBuffer[stringColumn](b) 109 benchmarkGenericBuffer[indexedStringColumn](b) 110 benchmarkGenericBuffer[uuidColumn](b) 111 benchmarkGenericBuffer[mapColumn](b) 112 benchmarkGenericBuffer[decimalColumn](b) 113 benchmarkGenericBuffer[contact](b) 114 benchmarkGenericBuffer[paddedBooleanColumn](b) 115 benchmarkGenericBuffer[optionalInt32Column](b) 116 benchmarkGenericBuffer[repeatedInt32Column](b) 117 } 118 119 func benchmarkGenericBuffer[Row generator[Row]](b *testing.B) { 120 var model Row 121 b.Run(reflect.TypeOf(model).Name(), func(b *testing.B) { 122 prng := rand.New(rand.NewSource(0)) 123 rows := make([]Row, benchmarkNumRows) 124 for i := range rows { 125 rows[i] = rows[i].generate(prng) 126 } 127 128 b.Run("go1.17", func(b *testing.B) { 129 buffer := parquet.NewBuffer(parquet.SchemaOf(rows[0])) 130 i := 0 131 benchmarkRowsPerSecond(b, func() int { 132 for j := 0; j < benchmarkRowsPerStep; j++ { 133 if err := buffer.Write(&rows[i]); err != nil { 134 b.Fatal(err) 135 } 136 } 137 138 i += benchmarkRowsPerStep 139 i %= benchmarkNumRows 140 141 if i == 0 { 142 buffer.Reset() 143 } 144 return benchmarkRowsPerStep 145 }) 146 }) 147 148 b.Run("go1.18", func(b *testing.B) { 149 buffer := parquet.NewGenericBuffer[Row]() 150 i := 0 151 benchmarkRowsPerSecond(b, func() int { 152 n, err := buffer.Write(rows[i : i+benchmarkRowsPerStep]) 153 if err != nil { 154 b.Fatal(err) 155 } 156 157 i += benchmarkRowsPerStep 158 i %= benchmarkNumRows 159 160 if i == 0 { 161 buffer.Reset() 162 } 163 return n 164 }) 165 }) 166 }) 167 }