github.com/apache/arrow/go/v14@v14.0.2/parquet/file/row_group_writer_test.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package file_test 18 19 import ( 20 "bytes" 21 "testing" 22 23 "github.com/apache/arrow/go/v14/arrow/memory" 24 "github.com/apache/arrow/go/v14/parquet/file" 25 "github.com/apache/arrow/go/v14/parquet/internal/encoding" 26 "github.com/apache/arrow/go/v14/parquet/schema" 27 "github.com/apache/thrift/lib/go/thrift" 28 "github.com/stretchr/testify/assert" 29 ) 30 31 func TestBufferedRowGroupNulls(t *testing.T) { 32 type SimpleSchema struct { 33 Col1 *int32 34 Col2 *float32 35 Col3 *float64 36 Col4 *int64 37 } 38 39 data := []SimpleSchema{ 40 {thrift.Int32Ptr(5), thrift.Float32Ptr(10), thrift.Float64Ptr(20), thrift.Int64Ptr(8)}, 41 {nil, thrift.Float32Ptr(10), thrift.Float64Ptr(20), thrift.Int64Ptr(8)}, 42 {thrift.Int32Ptr(5), nil, thrift.Float64Ptr(20), thrift.Int64Ptr(8)}, 43 {thrift.Int32Ptr(5), thrift.Float32Ptr(10), nil, thrift.Int64Ptr(8)}, 44 {thrift.Int32Ptr(5), thrift.Float32Ptr(10), thrift.Float64Ptr(20), nil}, 45 {thrift.Int32Ptr(5), thrift.Float32Ptr(10), thrift.Float64Ptr(20), thrift.Int64Ptr(8)}, 46 } 47 48 sink := encoding.NewBufferWriter(0, memory.DefaultAllocator) 49 sc, err := schema.NewSchemaFromStruct(SimpleSchema{}) 50 assert.NoError(t, err) 51 52 writer := file.NewParquetWriter(sink, sc.Root()) 53 rgWriter := writer.AppendBufferedRowGroup() 54 55 for _, d := range data { 56 cw, _ := rgWriter.Column(0) 57 if d.Col1 != nil { 58 cw.(*file.Int32ColumnChunkWriter).WriteBatch([]int32{*d.Col1}, []int16{1}, nil) 59 } else { 60 cw.(*file.Int32ColumnChunkWriter).WriteBatch(nil, []int16{0}, nil) 61 } 62 63 cw, _ = rgWriter.Column(1) 64 if d.Col2 != nil { 65 cw.(*file.Float32ColumnChunkWriter).WriteBatch([]float32{*d.Col2}, []int16{1}, nil) 66 } else { 67 cw.(*file.Float32ColumnChunkWriter).WriteBatch(nil, []int16{0}, nil) 68 } 69 70 cw, _ = rgWriter.Column(2) 71 if d.Col3 != nil { 72 cw.(*file.Float64ColumnChunkWriter).WriteBatch([]float64{*d.Col3}, []int16{1}, nil) 73 } else { 74 cw.(*file.Float64ColumnChunkWriter).WriteBatch(nil, []int16{0}, nil) 75 } 76 77 cw, _ = rgWriter.Column(3) 78 if d.Col4 != nil { 79 cw.(*file.Int64ColumnChunkWriter).WriteBatch([]int64{*d.Col4}, []int16{1}, nil) 80 } else { 81 cw.(*file.Int64ColumnChunkWriter).WriteBatch(nil, []int16{0}, nil) 82 } 83 } 84 85 rgWriter.Close() 86 writer.Close() 87 88 buffer := sink.Finish() 89 defer buffer.Release() 90 91 reader, err := file.NewParquetReader(bytes.NewReader(buffer.Bytes())) 92 assert.NoError(t, err) 93 94 assert.EqualValues(t, 1, reader.NumRowGroups()) 95 rgr := reader.RowGroup(0) 96 assert.EqualValues(t, len(data), rgr.NumRows()) 97 }