github.com/apache/arrow/go/v7@v7.0.1/parquet/file/row_group_writer_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package file_test
    18  
    19  import (
    20  	"bytes"
    21  	"testing"
    22  
    23  	"github.com/apache/arrow/go/v7/arrow/memory"
    24  	"github.com/apache/arrow/go/v7/parquet/file"
    25  	"github.com/apache/arrow/go/v7/parquet/internal/encoding"
    26  	"github.com/apache/arrow/go/v7/parquet/schema"
    27  	"github.com/apache/thrift/lib/go/thrift"
    28  	"github.com/stretchr/testify/assert"
    29  )
    30  
    31  func TestBufferedRowGroupNulls(t *testing.T) {
    32  	type SimpleSchema struct {
    33  		Col1 *int32
    34  		Col2 *float32
    35  		Col3 *float64
    36  		Col4 *int64
    37  	}
    38  
    39  	data := []SimpleSchema{
    40  		{thrift.Int32Ptr(5), thrift.Float32Ptr(10), thrift.Float64Ptr(20), thrift.Int64Ptr(8)},
    41  		{nil, thrift.Float32Ptr(10), thrift.Float64Ptr(20), thrift.Int64Ptr(8)},
    42  		{thrift.Int32Ptr(5), nil, thrift.Float64Ptr(20), thrift.Int64Ptr(8)},
    43  		{thrift.Int32Ptr(5), thrift.Float32Ptr(10), nil, thrift.Int64Ptr(8)},
    44  		{thrift.Int32Ptr(5), thrift.Float32Ptr(10), thrift.Float64Ptr(20), nil},
    45  		{thrift.Int32Ptr(5), thrift.Float32Ptr(10), thrift.Float64Ptr(20), thrift.Int64Ptr(8)},
    46  	}
    47  
    48  	sink := encoding.NewBufferWriter(0, memory.DefaultAllocator)
    49  	sc, err := schema.NewSchemaFromStruct(SimpleSchema{})
    50  	assert.NoError(t, err)
    51  
    52  	writer := file.NewParquetWriter(sink, sc.Root())
    53  	rgWriter := writer.AppendBufferedRowGroup()
    54  
    55  	for _, d := range data {
    56  		cw, _ := rgWriter.Column(0)
    57  		if d.Col1 != nil {
    58  			cw.(*file.Int32ColumnChunkWriter).WriteBatch([]int32{*d.Col1}, []int16{1}, nil)
    59  		} else {
    60  			cw.(*file.Int32ColumnChunkWriter).WriteBatch(nil, []int16{0}, nil)
    61  		}
    62  
    63  		cw, _ = rgWriter.Column(1)
    64  		if d.Col2 != nil {
    65  			cw.(*file.Float32ColumnChunkWriter).WriteBatch([]float32{*d.Col2}, []int16{1}, nil)
    66  		} else {
    67  			cw.(*file.Float32ColumnChunkWriter).WriteBatch(nil, []int16{0}, nil)
    68  		}
    69  
    70  		cw, _ = rgWriter.Column(2)
    71  		if d.Col3 != nil {
    72  			cw.(*file.Float64ColumnChunkWriter).WriteBatch([]float64{*d.Col3}, []int16{1}, nil)
    73  		} else {
    74  			cw.(*file.Float64ColumnChunkWriter).WriteBatch(nil, []int16{0}, nil)
    75  		}
    76  
    77  		cw, _ = rgWriter.Column(3)
    78  		if d.Col4 != nil {
    79  			cw.(*file.Int64ColumnChunkWriter).WriteBatch([]int64{*d.Col4}, []int16{1}, nil)
    80  		} else {
    81  			cw.(*file.Int64ColumnChunkWriter).WriteBatch(nil, []int16{0}, nil)
    82  		}
    83  	}
    84  
    85  	rgWriter.Close()
    86  	writer.Close()
    87  
    88  	buffer := sink.Finish()
    89  	defer buffer.Release()
    90  
    91  	reader, err := file.NewParquetReader(bytes.NewReader(buffer.Bytes()))
    92  	assert.NoError(t, err)
    93  
    94  	assert.EqualValues(t, 1, reader.NumRowGroups())
    95  	rgr := reader.RowGroup(0)
    96  	assert.EqualValues(t, len(data), rgr.NumRows())
    97  }