github.com/apache/arrow/go/v14@v14.0.1/parquet/pqarrow/encode_arrow_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package pqarrow_test
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"fmt"
    23  	"math"
    24  	"strconv"
    25  	"strings"
    26  	"testing"
    27  
    28  	"github.com/apache/arrow/go/v14/arrow"
    29  	"github.com/apache/arrow/go/v14/arrow/array"
    30  	"github.com/apache/arrow/go/v14/arrow/bitutil"
    31  	"github.com/apache/arrow/go/v14/arrow/decimal128"
    32  	"github.com/apache/arrow/go/v14/arrow/decimal256"
    33  	"github.com/apache/arrow/go/v14/arrow/ipc"
    34  	"github.com/apache/arrow/go/v14/arrow/memory"
    35  	"github.com/apache/arrow/go/v14/internal/types"
    36  	"github.com/apache/arrow/go/v14/internal/utils"
    37  	"github.com/apache/arrow/go/v14/parquet"
    38  	"github.com/apache/arrow/go/v14/parquet/compress"
    39  	"github.com/apache/arrow/go/v14/parquet/file"
    40  	"github.com/apache/arrow/go/v14/parquet/internal/encoding"
    41  	"github.com/apache/arrow/go/v14/parquet/internal/testutils"
    42  	"github.com/apache/arrow/go/v14/parquet/pqarrow"
    43  	"github.com/apache/arrow/go/v14/parquet/schema"
    44  	"github.com/google/uuid"
    45  	"github.com/stretchr/testify/assert"
    46  	"github.com/stretchr/testify/require"
    47  	"github.com/stretchr/testify/suite"
    48  )
    49  
    50  func makeSimpleTable(values *arrow.Chunked, nullable bool) arrow.Table {
    51  	sc := arrow.NewSchema([]arrow.Field{{Name: "col", Type: values.DataType(), Nullable: nullable,
    52  		Metadata: arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"-1"})}}, nil)
    53  	column := arrow.NewColumn(sc.Field(0), values)
    54  	defer column.Release()
    55  	return array.NewTable(sc, []arrow.Column{*column}, -1)
    56  }
    57  
    58  func makeDateTimeTypesTable(mem memory.Allocator, expected bool, addFieldMeta bool) arrow.Table {
    59  	isValid := []bool{true, true, true, false, true, true}
    60  
    61  	// roundtrip without modification
    62  	f0 := arrow.Field{Name: "f0", Type: arrow.FixedWidthTypes.Date32, Nullable: true}
    63  	f1 := arrow.Field{Name: "f1", Type: arrow.FixedWidthTypes.Timestamp_ms, Nullable: true}
    64  	f2 := arrow.Field{Name: "f2", Type: arrow.FixedWidthTypes.Timestamp_us, Nullable: true}
    65  	f3 := arrow.Field{Name: "f3", Type: arrow.FixedWidthTypes.Timestamp_ns, Nullable: true}
    66  	f3X := arrow.Field{Name: "f3", Type: arrow.FixedWidthTypes.Timestamp_us, Nullable: true}
    67  	f4 := arrow.Field{Name: "f4", Type: arrow.FixedWidthTypes.Time32ms, Nullable: true}
    68  	f5 := arrow.Field{Name: "f5", Type: arrow.FixedWidthTypes.Time64us, Nullable: true}
    69  	f6 := arrow.Field{Name: "f6", Type: arrow.FixedWidthTypes.Time64ns, Nullable: true}
    70  
    71  	fieldList := []arrow.Field{f0, f1, f2}
    72  	if expected {
    73  		fieldList = append(fieldList, f3X)
    74  	} else {
    75  		fieldList = append(fieldList, f3)
    76  	}
    77  	fieldList = append(fieldList, f4, f5, f6)
    78  
    79  	if addFieldMeta {
    80  		for idx := range fieldList {
    81  			fieldList[idx].Metadata = arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{strconv.Itoa(idx + 1)})
    82  		}
    83  	}
    84  	arrsc := arrow.NewSchema(fieldList, nil)
    85  
    86  	d32Values := []arrow.Date32{1489269000, 1489270000, 1489271000, 1489272000, 1489272000, 1489273000}
    87  	ts64nsValues := []arrow.Timestamp{1489269000000, 1489270000000, 1489271000000, 1489272000000, 1489272000000, 1489273000000}
    88  	ts64usValues := []arrow.Timestamp{1489269000, 1489270000, 1489271000, 1489272000, 1489272000, 1489273000}
    89  	ts64msValues := []arrow.Timestamp{1489269, 1489270, 1489271, 1489272, 1489272, 1489273}
    90  	t32Values := []arrow.Time32{1489269000, 1489270000, 1489271000, 1489272000, 1489272000, 1489273000}
    91  	t64nsValues := []arrow.Time64{1489269000000, 1489270000000, 1489271000000, 1489272000000, 1489272000000, 1489273000000}
    92  	t64usValues := []arrow.Time64{1489269000, 1489270000, 1489271000, 1489272000, 1489272000, 1489273000}
    93  
    94  	builders := make([]array.Builder, 0, len(fieldList))
    95  	for _, f := range fieldList {
    96  		bldr := array.NewBuilder(mem, f.Type)
    97  		defer bldr.Release()
    98  		builders = append(builders, bldr)
    99  	}
   100  
   101  	builders[0].(*array.Date32Builder).AppendValues(d32Values, isValid)
   102  	builders[1].(*array.TimestampBuilder).AppendValues(ts64msValues, isValid)
   103  	builders[2].(*array.TimestampBuilder).AppendValues(ts64usValues, isValid)
   104  	if expected {
   105  		builders[3].(*array.TimestampBuilder).AppendValues(ts64usValues, isValid)
   106  	} else {
   107  		builders[3].(*array.TimestampBuilder).AppendValues(ts64nsValues, isValid)
   108  	}
   109  	builders[4].(*array.Time32Builder).AppendValues(t32Values, isValid)
   110  	builders[5].(*array.Time64Builder).AppendValues(t64usValues, isValid)
   111  	builders[6].(*array.Time64Builder).AppendValues(t64nsValues, isValid)
   112  
   113  	cols := make([]arrow.Column, 0, len(fieldList))
   114  	for idx, field := range fieldList {
   115  		arr := builders[idx].NewArray()
   116  		defer arr.Release()
   117  
   118  		chunked := arrow.NewChunked(field.Type, []arrow.Array{arr})
   119  		defer chunked.Release()
   120  		col := arrow.NewColumn(field, chunked)
   121  		defer col.Release()
   122  		cols = append(cols, *col)
   123  	}
   124  
   125  	return array.NewTable(arrsc, cols, int64(len(isValid)))
   126  }
   127  
   128  func TestWriteArrowCols(t *testing.T) {
   129  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
   130  	defer mem.AssertSize(t, 0)
   131  
   132  	tbl := makeDateTimeTypesTable(mem, false, false)
   133  	defer tbl.Release()
   134  
   135  	psc, err := pqarrow.ToParquet(tbl.Schema(), nil, pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem)))
   136  	require.NoError(t, err)
   137  
   138  	manifest, err := pqarrow.NewSchemaManifest(psc, nil, nil)
   139  	require.NoError(t, err)
   140  
   141  	sink := encoding.NewBufferWriter(0, mem)
   142  	defer sink.Release()
   143  	writer := file.NewParquetWriter(sink, psc.Root(), file.WithWriterProps(parquet.NewWriterProperties(parquet.WithVersion(parquet.V2_4))))
   144  
   145  	srgw := writer.AppendRowGroup()
   146  	ctx := pqarrow.NewArrowWriteContext(context.TODO(), nil)
   147  
   148  	for i := int64(0); i < tbl.NumCols(); i++ {
   149  		acw, err := pqarrow.NewArrowColumnWriter(tbl.Column(int(i)).Data(), 0, tbl.NumRows(), manifest, srgw, int(i))
   150  		require.NoError(t, err)
   151  		require.NoError(t, acw.Write(ctx))
   152  	}
   153  	require.NoError(t, srgw.Close())
   154  	require.NoError(t, writer.Close())
   155  
   156  	expected := makeDateTimeTypesTable(mem, true, false)
   157  	defer expected.Release()
   158  
   159  	reader, err := file.NewParquetReader(bytes.NewReader(sink.Bytes()))
   160  	require.NoError(t, err)
   161  
   162  	assert.EqualValues(t, expected.NumCols(), reader.MetaData().Schema.NumColumns())
   163  	assert.EqualValues(t, expected.NumRows(), reader.NumRows())
   164  	assert.EqualValues(t, 1, reader.NumRowGroups())
   165  
   166  	rgr := reader.RowGroup(0)
   167  
   168  	for i := 0; i < int(expected.NumCols()); i++ {
   169  		var (
   170  			total        int64
   171  			read         int
   172  			defLevelsOut = make([]int16, int(expected.NumRows()))
   173  			arr          = expected.Column(i).Data().Chunk(0)
   174  		)
   175  		switch expected.Schema().Field(i).Type.(arrow.FixedWidthDataType).BitWidth() {
   176  		case 32:
   177  			col, err := rgr.Column(i)
   178  			assert.NoError(t, err)
   179  			colReader := col.(*file.Int32ColumnChunkReader)
   180  			vals := make([]int32, int(expected.NumRows()))
   181  			total, read, err = colReader.ReadBatch(expected.NumRows(), vals, defLevelsOut, nil)
   182  			require.NoError(t, err)
   183  
   184  			nulls := 0
   185  			for j := 0; j < arr.Len(); j++ {
   186  				if arr.IsNull(j) {
   187  					nulls++
   188  					continue
   189  				}
   190  
   191  				switch v := arr.(type) {
   192  				case *array.Date32:
   193  					assert.EqualValues(t, v.Value(j), vals[j-nulls])
   194  				case *array.Time32:
   195  					assert.EqualValues(t, v.Value(j), vals[j-nulls])
   196  				}
   197  			}
   198  		case 64:
   199  			col, err := rgr.Column(i)
   200  			assert.NoError(t, err)
   201  			colReader := col.(*file.Int64ColumnChunkReader)
   202  			vals := make([]int64, int(expected.NumRows()))
   203  			total, read, err = colReader.ReadBatch(expected.NumRows(), vals, defLevelsOut, nil)
   204  			require.NoError(t, err)
   205  
   206  			nulls := 0
   207  			for j := 0; j < arr.Len(); j++ {
   208  				if arr.IsNull(j) {
   209  					nulls++
   210  					continue
   211  				}
   212  
   213  				switch v := arr.(type) {
   214  				case *array.Date64:
   215  					assert.EqualValues(t, v.Value(j), vals[j-nulls])
   216  				case *array.Time64:
   217  					assert.EqualValues(t, v.Value(j), vals[j-nulls])
   218  				case *array.Timestamp:
   219  					assert.EqualValues(t, v.Value(j), vals[j-nulls])
   220  				}
   221  			}
   222  		}
   223  		assert.EqualValues(t, expected.NumRows(), total)
   224  		assert.EqualValues(t, expected.NumRows()-1, read)
   225  		assert.Equal(t, []int16{1, 1, 1, 0, 1, 1}, defLevelsOut)
   226  	}
   227  }
   228  
   229  func TestWriteArrowInt96(t *testing.T) {
   230  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
   231  	defer mem.AssertSize(t, 0)
   232  
   233  	tbl := makeDateTimeTypesTable(mem, false, false)
   234  	defer tbl.Release()
   235  
   236  	props := pqarrow.NewArrowWriterProperties(pqarrow.WithDeprecatedInt96Timestamps(true), pqarrow.WithAllocator(mem))
   237  
   238  	psc, err := pqarrow.ToParquet(tbl.Schema(), nil, props)
   239  	require.NoError(t, err)
   240  
   241  	manifest, err := pqarrow.NewSchemaManifest(psc, nil, nil)
   242  	require.NoError(t, err)
   243  
   244  	sink := encoding.NewBufferWriter(0, mem)
   245  	defer sink.Release()
   246  
   247  	writer := file.NewParquetWriter(sink, psc.Root(), file.WithWriterProps(parquet.NewWriterProperties(parquet.WithAllocator(mem))))
   248  
   249  	srgw := writer.AppendRowGroup()
   250  	ctx := pqarrow.NewArrowWriteContext(context.TODO(), &props)
   251  
   252  	for i := int64(0); i < tbl.NumCols(); i++ {
   253  		acw, err := pqarrow.NewArrowColumnWriter(tbl.Column(int(i)).Data(), 0, tbl.NumRows(), manifest, srgw, int(i))
   254  		require.NoError(t, err)
   255  		require.NoError(t, acw.Write(ctx))
   256  	}
   257  	require.NoError(t, srgw.Close())
   258  	require.NoError(t, writer.Close())
   259  
   260  	expected := makeDateTimeTypesTable(mem, false, false)
   261  	defer expected.Release()
   262  
   263  	reader, err := file.NewParquetReader(bytes.NewReader(sink.Bytes()))
   264  	require.NoError(t, err)
   265  
   266  	assert.EqualValues(t, expected.NumCols(), reader.MetaData().Schema.NumColumns())
   267  	assert.EqualValues(t, expected.NumRows(), reader.NumRows())
   268  	assert.EqualValues(t, 1, reader.NumRowGroups())
   269  
   270  	rgr := reader.RowGroup(0)
   271  	tsRdr, err := rgr.Column(3)
   272  	assert.NoError(t, err)
   273  	assert.Equal(t, parquet.Types.Int96, tsRdr.Type())
   274  
   275  	rdr := tsRdr.(*file.Int96ColumnChunkReader)
   276  	vals := make([]parquet.Int96, expected.NumRows())
   277  	defLevels := make([]int16, int(expected.NumRows()))
   278  
   279  	total, read, _ := rdr.ReadBatch(expected.NumRows(), vals, defLevels, nil)
   280  	assert.EqualValues(t, expected.NumRows(), total)
   281  	assert.EqualValues(t, expected.NumRows()-1, read)
   282  	assert.Equal(t, []int16{1, 1, 1, 0, 1, 1}, defLevels)
   283  
   284  	data := expected.Column(3).Data().Chunk(0).(*array.Timestamp)
   285  	assert.EqualValues(t, data.Value(0), vals[0].ToTime().UnixNano())
   286  	assert.EqualValues(t, data.Value(1), vals[1].ToTime().UnixNano())
   287  	assert.EqualValues(t, data.Value(2), vals[2].ToTime().UnixNano())
   288  	assert.EqualValues(t, data.Value(4), vals[3].ToTime().UnixNano())
   289  	assert.EqualValues(t, data.Value(5), vals[4].ToTime().UnixNano())
   290  }
   291  
   292  func writeTableToBuffer(t *testing.T, mem memory.Allocator, tbl arrow.Table, rowGroupSize int64, props pqarrow.ArrowWriterProperties) *memory.Buffer {
   293  	sink := encoding.NewBufferWriter(0, mem)
   294  	defer sink.Release()
   295  	wrprops := parquet.NewWriterProperties(parquet.WithVersion(parquet.V1_0))
   296  	psc, err := pqarrow.ToParquet(tbl.Schema(), wrprops, props)
   297  	require.NoError(t, err)
   298  
   299  	manifest, err := pqarrow.NewSchemaManifest(psc, nil, nil)
   300  	require.NoError(t, err)
   301  
   302  	writer := file.NewParquetWriter(sink, psc.Root(), file.WithWriterProps(wrprops))
   303  	ctx := pqarrow.NewArrowWriteContext(context.TODO(), &props)
   304  
   305  	offset := int64(0)
   306  	for offset < tbl.NumRows() {
   307  		sz := utils.Min(rowGroupSize, tbl.NumRows()-offset)
   308  		srgw := writer.AppendRowGroup()
   309  		for i := 0; i < int(tbl.NumCols()); i++ {
   310  			col := tbl.Column(i)
   311  			acw, err := pqarrow.NewArrowColumnWriter(col.Data(), offset, sz, manifest, srgw, i)
   312  			require.NoError(t, err)
   313  			require.NoError(t, acw.Write(ctx))
   314  		}
   315  		srgw.Close()
   316  		offset += sz
   317  	}
   318  	writer.Close()
   319  
   320  	return sink.Finish()
   321  }
   322  
   323  func simpleRoundTrip(t *testing.T, tbl arrow.Table, rowGroupSize int64) {
   324  	t.Helper()
   325  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
   326  	defer mem.AssertSize(t, 0)
   327  
   328  	buf := writeTableToBuffer(t, mem, tbl, rowGroupSize, pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem)))
   329  	defer buf.Release()
   330  
   331  	rdr, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()))
   332  	require.NoError(t, err)
   333  
   334  	ardr, err := pqarrow.NewFileReader(rdr, pqarrow.ArrowReadProperties{}, mem)
   335  	require.NoError(t, err)
   336  
   337  	for i := 0; i < int(tbl.NumCols()); i++ {
   338  		crdr, err := ardr.GetColumn(context.TODO(), i)
   339  		require.NoError(t, err)
   340  
   341  		chunked, err := crdr.NextBatch(tbl.NumRows())
   342  		require.NoError(t, err)
   343  		defer chunked.Release()
   344  
   345  		require.EqualValues(t, tbl.NumRows(), chunked.Len())
   346  
   347  		chunkList := tbl.Column(i).Data().Chunks()
   348  		offset := int64(0)
   349  		for _, chnk := range chunkList {
   350  			slc := array.NewChunkedSlice(chunked, offset, offset+int64(chnk.Len()))
   351  			defer slc.Release()
   352  
   353  			assert.EqualValues(t, chnk.Len(), slc.Len())
   354  			if len(slc.Chunks()) == 1 {
   355  				offset += int64(chnk.Len())
   356  				assert.True(t, array.Equal(chnk, slc.Chunk(0)))
   357  			}
   358  		}
   359  		crdr.Release()
   360  	}
   361  }
   362  
   363  func TestWriteKeyValueMetadata(t *testing.T) {
   364  	kv := map[string]string{
   365  		"key1": "value1",
   366  		"key2": "value2",
   367  		"key3": "value3",
   368  	}
   369  
   370  	sc := arrow.NewSchema([]arrow.Field{
   371  		{Name: "int32", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
   372  	}, nil)
   373  	bldr := array.NewRecordBuilder(memory.DefaultAllocator, sc)
   374  	defer bldr.Release()
   375  	for _, b := range bldr.Fields() {
   376  		b.AppendNull()
   377  	}
   378  
   379  	rec := bldr.NewRecord()
   380  	defer rec.Release()
   381  
   382  	props := parquet.NewWriterProperties(
   383  		parquet.WithVersion(parquet.V1_0),
   384  	)
   385  	var buf bytes.Buffer
   386  	fw, err := pqarrow.NewFileWriter(sc, &buf, props, pqarrow.DefaultWriterProps())
   387  	require.NoError(t, err)
   388  	err = fw.Write(rec)
   389  	require.NoError(t, err)
   390  
   391  	for key, value := range kv {
   392  		require.NoError(t, fw.AppendKeyValueMetadata(key, value))
   393  	}
   394  
   395  	err = fw.Close()
   396  	require.NoError(t, err)
   397  
   398  	reader, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()))
   399  	require.NoError(t, err)
   400  
   401  	for key, value := range kv {
   402  		got := reader.MetaData().KeyValueMetadata().FindValue(key)
   403  		require.NotNil(t, got)
   404  		assert.Equal(t, value, *got)
   405  	}
   406  }
   407  
   408  func TestWriteEmptyLists(t *testing.T) {
   409  	sc := arrow.NewSchema([]arrow.Field{
   410  		{Name: "f1", Type: arrow.ListOf(arrow.FixedWidthTypes.Date32)},
   411  		{Name: "f2", Type: arrow.ListOf(arrow.FixedWidthTypes.Date64)},
   412  		{Name: "f3", Type: arrow.ListOf(arrow.FixedWidthTypes.Timestamp_us)},
   413  		{Name: "f4", Type: arrow.ListOf(arrow.FixedWidthTypes.Timestamp_ms)},
   414  		{Name: "f5", Type: arrow.ListOf(arrow.FixedWidthTypes.Time32ms)},
   415  		{Name: "f6", Type: arrow.ListOf(arrow.FixedWidthTypes.Time64ns)},
   416  		{Name: "f7", Type: arrow.ListOf(arrow.FixedWidthTypes.Time64us)},
   417  	}, nil)
   418  	bldr := array.NewRecordBuilder(memory.DefaultAllocator, sc)
   419  	defer bldr.Release()
   420  	for _, b := range bldr.Fields() {
   421  		b.AppendNull()
   422  	}
   423  
   424  	rec := bldr.NewRecord()
   425  	defer rec.Release()
   426  
   427  	props := parquet.NewWriterProperties(
   428  		parquet.WithVersion(parquet.V1_0),
   429  	)
   430  	arrprops := pqarrow.DefaultWriterProps()
   431  	var buf bytes.Buffer
   432  	fw, err := pqarrow.NewFileWriter(sc, &buf, props, arrprops)
   433  	require.NoError(t, err)
   434  	err = fw.Write(rec)
   435  	require.NoError(t, err)
   436  	err = fw.Close()
   437  	require.NoError(t, err)
   438  }
   439  
   440  func TestArrowReadWriteTableChunkedCols(t *testing.T) {
   441  	chunkSizes := []int{2, 4, 10, 2}
   442  	const totalLen = int64(18)
   443  
   444  	rng := testutils.NewRandomArrayGenerator(0)
   445  
   446  	arr := rng.Int32(totalLen, 0, math.MaxInt32/2, 0.9)
   447  	defer arr.Release()
   448  
   449  	offset := int64(0)
   450  	chunks := make([]arrow.Array, 0)
   451  	for _, chnksize := range chunkSizes {
   452  		chk := array.NewSlice(arr, offset, offset+int64(chnksize))
   453  		defer chk.Release()
   454  		defer chk.Release() // for NewChunked below
   455  		chunks = append(chunks, chk)
   456  	}
   457  
   458  	sc := arrow.NewSchema([]arrow.Field{{Name: "field", Type: arr.DataType(), Nullable: true}}, nil)
   459  
   460  	chk := arrow.NewChunked(arr.DataType(), chunks)
   461  	defer chk.Release()
   462  
   463  	tbl := array.NewTable(sc, []arrow.Column{*arrow.NewColumn(sc.Field(0), chk)}, -1)
   464  	defer tbl.Release()
   465  
   466  	simpleRoundTrip(t, tbl, 2)
   467  	simpleRoundTrip(t, tbl, 10)
   468  }
   469  
   470  // set this up for checking our expected results so we can test the functions
   471  // that generate them which we export
   472  func getLogicalType(typ arrow.DataType) schema.LogicalType {
   473  	switch typ.ID() {
   474  	case arrow.DICTIONARY:
   475  		return getLogicalType(typ.(*arrow.DictionaryType).ValueType)
   476  	case arrow.INT8:
   477  		return schema.NewIntLogicalType(8, true)
   478  	case arrow.UINT8:
   479  		return schema.NewIntLogicalType(8, false)
   480  	case arrow.INT16:
   481  		return schema.NewIntLogicalType(16, true)
   482  	case arrow.UINT16:
   483  		return schema.NewIntLogicalType(16, false)
   484  	case arrow.INT32:
   485  		return schema.NewIntLogicalType(32, true)
   486  	case arrow.UINT32:
   487  		return schema.NewIntLogicalType(32, false)
   488  	case arrow.INT64:
   489  		return schema.NewIntLogicalType(64, true)
   490  	case arrow.UINT64:
   491  		return schema.NewIntLogicalType(64, false)
   492  	case arrow.STRING, arrow.LARGE_STRING:
   493  		return schema.StringLogicalType{}
   494  	case arrow.DATE32:
   495  		return schema.DateLogicalType{}
   496  	case arrow.DATE64:
   497  		return schema.DateLogicalType{}
   498  	case arrow.TIMESTAMP:
   499  		ts := typ.(*arrow.TimestampType)
   500  		adjustedUTC := len(ts.TimeZone) == 0
   501  		switch ts.Unit {
   502  		case arrow.Microsecond:
   503  			return schema.NewTimestampLogicalType(adjustedUTC, schema.TimeUnitMicros)
   504  		case arrow.Millisecond:
   505  			return schema.NewTimestampLogicalType(adjustedUTC, schema.TimeUnitMillis)
   506  		case arrow.Nanosecond:
   507  			return schema.NewTimestampLogicalType(adjustedUTC, schema.TimeUnitNanos)
   508  		default:
   509  			panic("only milli, micro and nano units supported for arrow timestamp")
   510  		}
   511  	case arrow.TIME32:
   512  		return schema.NewTimeLogicalType(false, schema.TimeUnitMillis)
   513  	case arrow.TIME64:
   514  		ts := typ.(*arrow.Time64Type)
   515  		switch ts.Unit {
   516  		case arrow.Microsecond:
   517  			return schema.NewTimeLogicalType(false, schema.TimeUnitMicros)
   518  		case arrow.Nanosecond:
   519  			return schema.NewTimeLogicalType(false, schema.TimeUnitNanos)
   520  		default:
   521  			panic("only micro and nano seconds are supported for arrow TIME64")
   522  		}
   523  	case arrow.DECIMAL, arrow.DECIMAL256:
   524  		dec := typ.(arrow.DecimalType)
   525  		return schema.NewDecimalLogicalType(dec.GetPrecision(), dec.GetScale())
   526  	}
   527  	return schema.NoLogicalType{}
   528  }
   529  
   530  func getPhysicalType(typ arrow.DataType) parquet.Type {
   531  	switch typ.ID() {
   532  	case arrow.DICTIONARY:
   533  		return getPhysicalType(typ.(*arrow.DictionaryType).ValueType)
   534  	case arrow.BOOL:
   535  		return parquet.Types.Boolean
   536  	case arrow.UINT8, arrow.INT8, arrow.UINT16, arrow.INT16, arrow.UINT32, arrow.INT32:
   537  		return parquet.Types.Int32
   538  	case arrow.INT64, arrow.UINT64:
   539  		return parquet.Types.Int64
   540  	case arrow.FLOAT32:
   541  		return parquet.Types.Float
   542  	case arrow.FLOAT64:
   543  		return parquet.Types.Double
   544  	case arrow.BINARY, arrow.LARGE_BINARY, arrow.STRING, arrow.LARGE_STRING:
   545  		return parquet.Types.ByteArray
   546  	case arrow.FIXED_SIZE_BINARY, arrow.DECIMAL:
   547  		return parquet.Types.FixedLenByteArray
   548  	case arrow.DATE32:
   549  		return parquet.Types.Int32
   550  	case arrow.DATE64:
   551  		// convert to date32 internally
   552  		return parquet.Types.Int32
   553  	case arrow.TIME32:
   554  		return parquet.Types.Int32
   555  	case arrow.TIME64, arrow.TIMESTAMP:
   556  		return parquet.Types.Int64
   557  	default:
   558  		return parquet.Types.Int32
   559  	}
   560  }
   561  
   562  const (
   563  	boolTestValue = true
   564  	uint8TestVal  = uint8(64)
   565  	int8TestVal   = int8(-64)
   566  	uint16TestVal = uint16(1024)
   567  	int16TestVal  = int16(-1024)
   568  	uint32TestVal = uint32(1024)
   569  	int32TestVal  = int32(-1024)
   570  	uint64TestVal = uint64(1024)
   571  	int64TestVal  = int64(-1024)
   572  	tsTestValue   = arrow.Timestamp(14695634030000)
   573  	date32TestVal = arrow.Date32(170000)
   574  	floatTestVal  = float32(2.1)
   575  	doubleTestVal = float64(4.2)
   576  	strTestVal    = "Test"
   577  
   578  	smallSize = 100
   579  )
   580  
   581  type ParquetIOTestSuite struct {
   582  	suite.Suite
   583  }
   584  
   585  func (ps *ParquetIOTestSuite) SetupTest() {
   586  	ps.NoError(arrow.RegisterExtensionType(types.NewUUIDType()))
   587  }
   588  
   589  func (ps *ParquetIOTestSuite) TearDownTest() {
   590  	if arrow.GetExtensionType("uuid") != nil {
   591  		ps.NoError(arrow.UnregisterExtensionType("uuid"))
   592  	}
   593  }
   594  
   595  func (ps *ParquetIOTestSuite) makeSimpleSchema(typ arrow.DataType, rep parquet.Repetition) *schema.GroupNode {
   596  	byteWidth := int32(-1)
   597  
   598  	switch typ := typ.(type) {
   599  	case *arrow.FixedSizeBinaryType:
   600  		byteWidth = int32(typ.ByteWidth)
   601  	case arrow.DecimalType:
   602  		byteWidth = pqarrow.DecimalSize(typ.GetPrecision())
   603  	case *arrow.DictionaryType:
   604  		valuesType := typ.ValueType
   605  		switch dt := valuesType.(type) {
   606  		case *arrow.FixedSizeBinaryType:
   607  			byteWidth = int32(dt.ByteWidth)
   608  		case arrow.DecimalType:
   609  			byteWidth = pqarrow.DecimalSize(dt.GetPrecision())
   610  		}
   611  	}
   612  
   613  	pnode, _ := schema.NewPrimitiveNodeLogical("column1", rep, getLogicalType(typ), getPhysicalType(typ), int(byteWidth), -1)
   614  	return schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Required, schema.FieldList{pnode}, -1))
   615  }
   616  
   617  func (ps *ParquetIOTestSuite) makePrimitiveTestCol(mem memory.Allocator, size int, typ arrow.DataType) arrow.Array {
   618  	switch typ.ID() {
   619  	case arrow.BOOL:
   620  		bldr := array.NewBooleanBuilder(mem)
   621  		defer bldr.Release()
   622  		for i := 0; i < size; i++ {
   623  			bldr.Append(boolTestValue)
   624  		}
   625  		return bldr.NewArray()
   626  	case arrow.INT8:
   627  		bldr := array.NewInt8Builder(mem)
   628  		defer bldr.Release()
   629  		for i := 0; i < size; i++ {
   630  			bldr.Append(int8TestVal)
   631  		}
   632  		return bldr.NewArray()
   633  	case arrow.UINT8:
   634  		bldr := array.NewUint8Builder(mem)
   635  		defer bldr.Release()
   636  		for i := 0; i < size; i++ {
   637  			bldr.Append(uint8TestVal)
   638  		}
   639  		return bldr.NewArray()
   640  	case arrow.INT16:
   641  		bldr := array.NewInt16Builder(mem)
   642  		defer bldr.Release()
   643  		for i := 0; i < size; i++ {
   644  			bldr.Append(int16TestVal)
   645  		}
   646  		return bldr.NewArray()
   647  	case arrow.UINT16:
   648  		bldr := array.NewUint16Builder(mem)
   649  		defer bldr.Release()
   650  		for i := 0; i < size; i++ {
   651  			bldr.Append(uint16TestVal)
   652  		}
   653  		return bldr.NewArray()
   654  	case arrow.INT32:
   655  		bldr := array.NewInt32Builder(mem)
   656  		defer bldr.Release()
   657  		for i := 0; i < size; i++ {
   658  			bldr.Append(int32TestVal)
   659  		}
   660  		return bldr.NewArray()
   661  	case arrow.UINT32:
   662  		bldr := array.NewUint32Builder(mem)
   663  		defer bldr.Release()
   664  		for i := 0; i < size; i++ {
   665  			bldr.Append(uint32TestVal)
   666  		}
   667  		return bldr.NewArray()
   668  	case arrow.INT64:
   669  		bldr := array.NewInt64Builder(mem)
   670  		defer bldr.Release()
   671  		for i := 0; i < size; i++ {
   672  			bldr.Append(int64TestVal)
   673  		}
   674  		return bldr.NewArray()
   675  	case arrow.UINT64:
   676  		bldr := array.NewUint64Builder(mem)
   677  		defer bldr.Release()
   678  		for i := 0; i < size; i++ {
   679  			bldr.Append(uint64TestVal)
   680  		}
   681  		return bldr.NewArray()
   682  	case arrow.FLOAT32:
   683  		bldr := array.NewFloat32Builder(mem)
   684  		defer bldr.Release()
   685  		for i := 0; i < size; i++ {
   686  			bldr.Append(floatTestVal)
   687  		}
   688  		return bldr.NewArray()
   689  	case arrow.FLOAT64:
   690  		bldr := array.NewFloat64Builder(mem)
   691  		defer bldr.Release()
   692  		for i := 0; i < size; i++ {
   693  			bldr.Append(doubleTestVal)
   694  		}
   695  		return bldr.NewArray()
   696  	}
   697  	return nil
   698  }
   699  
   700  func (ps *ParquetIOTestSuite) makeTestFile(mem memory.Allocator, typ arrow.DataType, arr arrow.Array, numChunks int) []byte {
   701  	sc := ps.makeSimpleSchema(typ, parquet.Repetitions.Required)
   702  	sink := encoding.NewBufferWriter(0, mem)
   703  	defer sink.Release()
   704  	writer := file.NewParquetWriter(sink, sc, file.WithWriterProps(parquet.NewWriterProperties(parquet.WithAllocator(mem))))
   705  
   706  	props := pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem))
   707  	ctx := pqarrow.NewArrowWriteContext(context.TODO(), &props)
   708  	rowGroupSize := arr.Len() / numChunks
   709  
   710  	for i := 0; i < numChunks; i++ {
   711  		rgw := writer.AppendRowGroup()
   712  		cw, err := rgw.NextColumn()
   713  		ps.NoError(err)
   714  
   715  		start := i * rowGroupSize
   716  		slc := array.NewSlice(arr, int64(start), int64(start+rowGroupSize))
   717  		defer slc.Release()
   718  		ps.NoError(pqarrow.WriteArrowToColumn(ctx, cw, slc, nil, nil, false))
   719  		ps.NoError(cw.Close())
   720  		ps.NoError(rgw.Close())
   721  	}
   722  	ps.NoError(writer.Close())
   723  	buf := sink.Finish()
   724  	defer buf.Release()
   725  	return buf.Bytes()
   726  }
   727  
   728  func (ps *ParquetIOTestSuite) createReader(mem memory.Allocator, data []byte) *pqarrow.FileReader {
   729  	rdr, err := file.NewParquetReader(bytes.NewReader(data), file.WithReadProps(parquet.NewReaderProperties(mem)))
   730  	ps.NoError(err)
   731  
   732  	reader, err := pqarrow.NewFileReader(rdr, pqarrow.ArrowReadProperties{}, mem)
   733  	ps.NoError(err)
   734  	return reader
   735  }
   736  
   737  func (ps *ParquetIOTestSuite) readTable(rdr *pqarrow.FileReader) arrow.Table {
   738  	tbl, err := rdr.ReadTable(context.TODO())
   739  	ps.NoError(err)
   740  	ps.NotNil(tbl)
   741  	return tbl
   742  }
   743  
   744  func (ps *ParquetIOTestSuite) checkSingleColumnRequiredTableRead(mem memory.Allocator, typ arrow.DataType, numChunks int) {
   745  	values := ps.makePrimitiveTestCol(mem, smallSize, typ)
   746  	defer values.Release()
   747  
   748  	data := ps.makeTestFile(mem, typ, values, numChunks)
   749  	reader := ps.createReader(mem, data)
   750  
   751  	tbl := ps.readTable(reader)
   752  	defer tbl.Release()
   753  
   754  	ps.EqualValues(1, tbl.NumCols())
   755  	ps.EqualValues(smallSize, tbl.NumRows())
   756  
   757  	chunked := tbl.Column(0).Data()
   758  	ps.Len(chunked.Chunks(), 1)
   759  	ps.True(array.Equal(values, chunked.Chunk(0)))
   760  }
   761  
   762  func (ps *ParquetIOTestSuite) checkSingleColumnRead(mem memory.Allocator, typ arrow.DataType, numChunks int) {
   763  	values := ps.makePrimitiveTestCol(mem, smallSize, typ)
   764  	defer values.Release()
   765  
   766  	data := ps.makeTestFile(mem, typ, values, numChunks)
   767  	reader := ps.createReader(mem, data)
   768  
   769  	cr, err := reader.GetColumn(context.TODO(), 0)
   770  	ps.NoError(err)
   771  	defer cr.Release()
   772  
   773  	chunked, err := cr.NextBatch(smallSize)
   774  	ps.NoError(err)
   775  	defer chunked.Release()
   776  
   777  	ps.Len(chunked.Chunks(), 1)
   778  	ps.True(array.Equal(values, chunked.Chunk(0)))
   779  }
   780  
   781  func (ps *ParquetIOTestSuite) TestDateTimeTypesReadWriteTable() {
   782  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
   783  	defer mem.AssertSize(ps.T(), 0)
   784  
   785  	toWrite := makeDateTimeTypesTable(mem, false, true)
   786  	defer toWrite.Release()
   787  	buf := writeTableToBuffer(ps.T(), mem, toWrite, toWrite.NumRows(), pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem)))
   788  	defer buf.Release()
   789  
   790  	reader := ps.createReader(mem, buf.Bytes())
   791  	tbl := ps.readTable(reader)
   792  	defer tbl.Release()
   793  
   794  	expected := makeDateTimeTypesTable(mem, true, true)
   795  	defer expected.Release()
   796  
   797  	ps.Equal(expected.NumCols(), tbl.NumCols())
   798  	ps.Equal(expected.NumRows(), tbl.NumRows())
   799  	ps.Truef(expected.Schema().Equal(tbl.Schema()), "expected schema: %s\ngot schema: %s", expected.Schema(), tbl.Schema())
   800  
   801  	for i := 0; i < int(expected.NumCols()); i++ {
   802  		exChunk := expected.Column(i).Data()
   803  		tblChunk := tbl.Column(i).Data()
   804  
   805  		ps.Equal(len(exChunk.Chunks()), len(tblChunk.Chunks()))
   806  		ps.Truef(array.Equal(exChunk.Chunk(0), tblChunk.Chunk(0)), "expected %s\ngot %s", exChunk.Chunk(0), tblChunk.Chunk(0))
   807  	}
   808  }
   809  
   810  func (ps *ParquetIOTestSuite) TestDateTimeTypesWithInt96ReadWriteTable() {
   811  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
   812  	defer mem.AssertSize(ps.T(), 0)
   813  
   814  	expected := makeDateTimeTypesTable(mem, false, true)
   815  	defer expected.Release()
   816  	buf := writeTableToBuffer(ps.T(), mem, expected, expected.NumRows(), pqarrow.NewArrowWriterProperties(pqarrow.WithDeprecatedInt96Timestamps(true)))
   817  	defer buf.Release()
   818  
   819  	reader := ps.createReader(mem, buf.Bytes())
   820  	tbl := ps.readTable(reader)
   821  	defer tbl.Release()
   822  
   823  	ps.Equal(expected.NumCols(), tbl.NumCols())
   824  	ps.Equal(expected.NumRows(), tbl.NumRows())
   825  	ps.Truef(expected.Schema().Equal(tbl.Schema()), "expected schema: %s\ngot schema: %s", expected.Schema(), tbl.Schema())
   826  
   827  	for i := 0; i < int(expected.NumCols()); i++ {
   828  		exChunk := expected.Column(i).Data()
   829  		tblChunk := tbl.Column(i).Data()
   830  
   831  		ps.Equal(len(exChunk.Chunks()), len(tblChunk.Chunks()))
   832  		ps.Truef(array.Equal(exChunk.Chunk(0), tblChunk.Chunk(0)), "expected %s\ngot %s", exChunk.Chunk(0), tblChunk.Chunk(0))
   833  	}
   834  }
   835  
   836  func (ps *ParquetIOTestSuite) TestLargeBinaryReadWriteTable() {
   837  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
   838  	defer mem.AssertSize(ps.T(), 0)
   839  
   840  	// While we may write using LargeString, when we read, we get an array.String back out.
   841  	// So we're building a normal array.String to use with array.Equal
   842  	lsBldr := array.NewLargeStringBuilder(mem)
   843  	defer lsBldr.Release()
   844  	lbBldr := array.NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary)
   845  	defer lbBldr.Release()
   846  
   847  	for i := 0; i < smallSize; i++ {
   848  		s := strconv.FormatInt(int64(i), 10)
   849  		lsBldr.Append(s)
   850  		lbBldr.Append([]byte(s))
   851  	}
   852  
   853  	lsValues := lsBldr.NewArray()
   854  	defer lsValues.Release()
   855  	lbValues := lbBldr.NewArray()
   856  	defer lbValues.Release()
   857  
   858  	lsField := arrow.Field{Name: "large_string", Type: arrow.BinaryTypes.LargeString, Nullable: true}
   859  	lbField := arrow.Field{Name: "large_binary", Type: arrow.BinaryTypes.LargeBinary, Nullable: true}
   860  	expected := array.NewTable(
   861  		arrow.NewSchema([]arrow.Field{lsField, lbField}, nil),
   862  		[]arrow.Column{
   863  			*arrow.NewColumn(lsField, arrow.NewChunked(lsField.Type, []arrow.Array{lsValues})),
   864  			*arrow.NewColumn(lbField, arrow.NewChunked(lbField.Type, []arrow.Array{lbValues})),
   865  		},
   866  		-1,
   867  	)
   868  	defer lsValues.Release() // NewChunked
   869  	defer lbValues.Release() // NewChunked
   870  	defer expected.Release()
   871  	ps.roundTripTable(mem, expected, true)
   872  }
   873  
   874  func (ps *ParquetIOTestSuite) TestReadSingleColumnFile() {
   875  	types := []arrow.DataType{
   876  		arrow.FixedWidthTypes.Boolean,
   877  		arrow.PrimitiveTypes.Uint8,
   878  		arrow.PrimitiveTypes.Int8,
   879  		arrow.PrimitiveTypes.Uint16,
   880  		arrow.PrimitiveTypes.Int16,
   881  		arrow.PrimitiveTypes.Uint32,
   882  		arrow.PrimitiveTypes.Int32,
   883  		arrow.PrimitiveTypes.Uint64,
   884  		arrow.PrimitiveTypes.Int64,
   885  		arrow.PrimitiveTypes.Float32,
   886  		arrow.PrimitiveTypes.Float64,
   887  	}
   888  
   889  	nchunks := []int{1, 4}
   890  
   891  	for _, n := range nchunks {
   892  		for _, dt := range types {
   893  			ps.Run(fmt.Sprintf("%s %d chunks", dt.Name(), n), func() {
   894  				mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
   895  				defer mem.AssertSize(ps.T(), 0)
   896  				ps.checkSingleColumnRead(mem, dt, n)
   897  			})
   898  		}
   899  	}
   900  }
   901  
   902  func (ps *ParquetIOTestSuite) TestSingleColumnRequiredRead() {
   903  	types := []arrow.DataType{
   904  		arrow.FixedWidthTypes.Boolean,
   905  		arrow.PrimitiveTypes.Uint8,
   906  		arrow.PrimitiveTypes.Int8,
   907  		arrow.PrimitiveTypes.Uint16,
   908  		arrow.PrimitiveTypes.Int16,
   909  		arrow.PrimitiveTypes.Uint32,
   910  		arrow.PrimitiveTypes.Int32,
   911  		arrow.PrimitiveTypes.Uint64,
   912  		arrow.PrimitiveTypes.Int64,
   913  		arrow.PrimitiveTypes.Float32,
   914  		arrow.PrimitiveTypes.Float64,
   915  	}
   916  
   917  	nchunks := []int{1, 4}
   918  
   919  	for _, n := range nchunks {
   920  		for _, dt := range types {
   921  			ps.Run(fmt.Sprintf("%s %d chunks", dt.Name(), n), func() {
   922  				mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
   923  				defer mem.AssertSize(ps.T(), 0)
   924  
   925  				ps.checkSingleColumnRequiredTableRead(mem, dt, n)
   926  			})
   927  		}
   928  	}
   929  }
   930  
   931  func (ps *ParquetIOTestSuite) TestReadDecimals() {
   932  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
   933  	defer mem.AssertSize(ps.T(), 0)
   934  
   935  	bigEndian := []parquet.ByteArray{
   936  		// 123456
   937  		[]byte{1, 226, 64},
   938  		// 987654
   939  		[]byte{15, 18, 6},
   940  		// -123456
   941  		[]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 254, 29, 192},
   942  	}
   943  
   944  	bldr := array.NewDecimal128Builder(mem, &arrow.Decimal128Type{Precision: 6, Scale: 3})
   945  	defer bldr.Release()
   946  
   947  	bldr.Append(decimal128.FromU64(123456))
   948  	bldr.Append(decimal128.FromU64(987654))
   949  	bldr.Append(decimal128.FromI64(-123456))
   950  
   951  	expected := bldr.NewDecimal128Array()
   952  	defer expected.Release()
   953  
   954  	sc := schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Required, schema.FieldList{
   955  		schema.Must(schema.NewPrimitiveNodeLogical("decimals", parquet.Repetitions.Required, schema.NewDecimalLogicalType(6, 3), parquet.Types.ByteArray, -1, -1)),
   956  	}, -1))
   957  
   958  	sink := encoding.NewBufferWriter(0, mem)
   959  	defer sink.Release()
   960  	writer := file.NewParquetWriter(sink, sc)
   961  
   962  	rgw := writer.AppendRowGroup()
   963  	cw, _ := rgw.NextColumn()
   964  	cw.(*file.ByteArrayColumnChunkWriter).WriteBatch(bigEndian, nil, nil)
   965  	cw.Close()
   966  	rgw.Close()
   967  	writer.Close()
   968  
   969  	rdr := ps.createReader(mem, sink.Bytes())
   970  	cr, err := rdr.GetColumn(context.TODO(), 0)
   971  	ps.NoError(err)
   972  
   973  	chunked, err := cr.NextBatch(smallSize)
   974  	ps.NoError(err)
   975  	defer chunked.Release()
   976  
   977  	ps.Len(chunked.Chunks(), 1)
   978  	ps.True(array.Equal(expected, chunked.Chunk(0)))
   979  }
   980  
   981  func (ps *ParquetIOTestSuite) TestReadDecimal256() {
   982  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
   983  	defer mem.AssertSize(ps.T(), 0)
   984  
   985  	bigEndian := []parquet.ByteArray{
   986  		// 123456
   987  		[]byte{1, 226, 64},
   988  		// 987654
   989  		[]byte{15, 18, 6},
   990  		// -123456
   991  		[]byte{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 254, 29, 192},
   992  	}
   993  
   994  	bldr := array.NewDecimal256Builder(mem, &arrow.Decimal256Type{Precision: 40, Scale: 3})
   995  	defer bldr.Release()
   996  
   997  	bldr.Append(decimal256.FromU64(123456))
   998  	bldr.Append(decimal256.FromU64(987654))
   999  	bldr.Append(decimal256.FromI64(-123456))
  1000  
  1001  	expected := bldr.NewDecimal256Array()
  1002  	defer expected.Release()
  1003  
  1004  	sc := schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Required, schema.FieldList{
  1005  		schema.Must(schema.NewPrimitiveNodeLogical("decimals", parquet.Repetitions.Required, schema.NewDecimalLogicalType(40, 3), parquet.Types.ByteArray, -1, -1)),
  1006  	}, -1))
  1007  
  1008  	sink := encoding.NewBufferWriter(0, mem)
  1009  	defer sink.Release()
  1010  	writer := file.NewParquetWriter(sink, sc)
  1011  
  1012  	rgw := writer.AppendRowGroup()
  1013  	cw, _ := rgw.NextColumn()
  1014  	cw.(*file.ByteArrayColumnChunkWriter).WriteBatch(bigEndian, nil, nil)
  1015  	cw.Close()
  1016  	rgw.Close()
  1017  	writer.Close()
  1018  
  1019  	rdr := ps.createReader(mem, sink.Bytes())
  1020  	cr, err := rdr.GetColumn(context.TODO(), 0)
  1021  	ps.NoError(err)
  1022  
  1023  	chunked, err := cr.NextBatch(smallSize)
  1024  	ps.NoError(err)
  1025  	defer chunked.Release()
  1026  
  1027  	ps.Len(chunked.Chunks(), 1)
  1028  	ps.Truef(array.Equal(expected, chunked.Chunk(0)), "expected: %s\ngot: %s", expected, chunked.Chunk(0))
  1029  }
  1030  
  1031  func (ps *ParquetIOTestSuite) TestReadNestedStruct() {
  1032  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1033  	defer mem.AssertSize(ps.T(), 0)
  1034  
  1035  	dt := arrow.StructOf(arrow.Field{
  1036  		Name: "nested",
  1037  		Type: arrow.StructOf(
  1038  			arrow.Field{Name: "bool", Type: arrow.FixedWidthTypes.Boolean},
  1039  			arrow.Field{Name: "int32", Type: arrow.PrimitiveTypes.Int32},
  1040  			arrow.Field{Name: "int64", Type: arrow.PrimitiveTypes.Int64},
  1041  		),
  1042  	})
  1043  	field := arrow.Field{Name: "struct", Type: dt, Nullable: true}
  1044  
  1045  	builder := array.NewStructBuilder(mem, dt)
  1046  	defer builder.Release()
  1047  	nested := builder.FieldBuilder(0).(*array.StructBuilder)
  1048  
  1049  	builder.Append(true)
  1050  	nested.Append(true)
  1051  	nested.FieldBuilder(0).(*array.BooleanBuilder).Append(true)
  1052  	nested.FieldBuilder(1).(*array.Int32Builder).Append(int32(-1))
  1053  	nested.FieldBuilder(2).(*array.Int64Builder).Append(int64(-2))
  1054  	builder.AppendNull()
  1055  
  1056  	arr := builder.NewStructArray()
  1057  	defer arr.Release()
  1058  
  1059  	expected := array.NewTable(
  1060  		arrow.NewSchema([]arrow.Field{field}, nil),
  1061  		[]arrow.Column{*arrow.NewColumn(field, arrow.NewChunked(dt, []arrow.Array{arr}))},
  1062  		-1,
  1063  	)
  1064  	defer arr.Release() // NewChunked
  1065  	defer expected.Release()
  1066  	ps.roundTripTable(mem, expected, true)
  1067  }
  1068  
  1069  func (ps *ParquetIOTestSuite) writeColumn(mem memory.Allocator, sc *schema.GroupNode, values arrow.Array) []byte {
  1070  	var buf bytes.Buffer
  1071  	arrsc, err := pqarrow.FromParquet(schema.NewSchema(sc), nil, nil)
  1072  	ps.NoError(err)
  1073  
  1074  	writer, err := pqarrow.NewFileWriter(arrsc, &buf, parquet.NewWriterProperties(parquet.WithDictionaryDefault(false)), pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem)))
  1075  	ps.NoError(err)
  1076  
  1077  	writer.NewRowGroup()
  1078  	ps.NoError(writer.WriteColumnData(values))
  1079  	//defer values.Release()
  1080  	ps.NoError(writer.Close())
  1081  	ps.NoError(writer.Close())
  1082  
  1083  	return buf.Bytes()
  1084  }
  1085  
  1086  func (ps *ParquetIOTestSuite) readAndCheckSingleColumnFile(mem memory.Allocator, data []byte, values arrow.Array) {
  1087  	reader := ps.createReader(mem, data)
  1088  	cr, err := reader.GetColumn(context.TODO(), 0)
  1089  	ps.NoError(err)
  1090  	ps.NotNil(cr)
  1091  	defer cr.Release()
  1092  
  1093  	chunked, err := cr.NextBatch(smallSize)
  1094  	ps.NoError(err)
  1095  	defer chunked.Release()
  1096  
  1097  	ps.Len(chunked.Chunks(), 1)
  1098  	ps.NotNil(chunked.Chunk(0))
  1099  
  1100  	ps.True(array.Equal(values, chunked.Chunk(0)))
  1101  }
  1102  
  1103  var fullTypeList = []arrow.DataType{
  1104  	arrow.FixedWidthTypes.Boolean,
  1105  	arrow.PrimitiveTypes.Uint8,
  1106  	arrow.PrimitiveTypes.Int8,
  1107  	arrow.PrimitiveTypes.Uint16,
  1108  	arrow.PrimitiveTypes.Int16,
  1109  	arrow.PrimitiveTypes.Uint32,
  1110  	arrow.PrimitiveTypes.Int32,
  1111  	arrow.PrimitiveTypes.Uint64,
  1112  	arrow.PrimitiveTypes.Int64,
  1113  	arrow.FixedWidthTypes.Date32,
  1114  	arrow.PrimitiveTypes.Float32,
  1115  	arrow.PrimitiveTypes.Float64,
  1116  	arrow.BinaryTypes.String,
  1117  	arrow.BinaryTypes.Binary,
  1118  	&arrow.FixedSizeBinaryType{ByteWidth: 10},
  1119  	&arrow.Decimal128Type{Precision: 1, Scale: 0},
  1120  	&arrow.Decimal128Type{Precision: 5, Scale: 4},
  1121  	&arrow.Decimal128Type{Precision: 10, Scale: 9},
  1122  	&arrow.Decimal128Type{Precision: 19, Scale: 18},
  1123  	&arrow.Decimal128Type{Precision: 23, Scale: 22},
  1124  	&arrow.Decimal128Type{Precision: 27, Scale: 26},
  1125  	&arrow.Decimal128Type{Precision: 38, Scale: 37},
  1126  }
  1127  
  1128  func (ps *ParquetIOTestSuite) TestSingleColumnRequiredWrite() {
  1129  	for _, dt := range fullTypeList {
  1130  		ps.Run(dt.Name(), func() {
  1131  			mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1132  			defer mem.AssertSize(ps.T(), 0)
  1133  
  1134  			values := testutils.RandomNonNull(mem, dt, smallSize)
  1135  			defer values.Release()
  1136  			sc := ps.makeSimpleSchema(dt, parquet.Repetitions.Required)
  1137  			data := ps.writeColumn(mem, sc, values)
  1138  			ps.readAndCheckSingleColumnFile(mem, data, values)
  1139  		})
  1140  	}
  1141  }
  1142  
  1143  func (ps *ParquetIOTestSuite) roundTripTable(mem memory.Allocator, expected arrow.Table, storeSchema bool) {
  1144  	var buf bytes.Buffer
  1145  	var props pqarrow.ArrowWriterProperties
  1146  	if storeSchema {
  1147  		props = pqarrow.NewArrowWriterProperties(pqarrow.WithStoreSchema(), pqarrow.WithAllocator(mem))
  1148  	} else {
  1149  		props = pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem))
  1150  	}
  1151  
  1152  	writeProps := parquet.NewWriterProperties(parquet.WithAllocator(mem))
  1153  	ps.Require().NoError(pqarrow.WriteTable(expected, &buf, expected.NumRows(), writeProps, props))
  1154  
  1155  	reader := ps.createReader(mem, buf.Bytes())
  1156  	defer reader.ParquetReader().Close()
  1157  
  1158  	tbl := ps.readTable(reader)
  1159  	defer tbl.Release()
  1160  
  1161  	ps.Equal(expected.NumCols(), tbl.NumCols())
  1162  	ps.Equal(expected.NumRows(), tbl.NumRows())
  1163  
  1164  	exChunk := expected.Column(0).Data()
  1165  	tblChunk := tbl.Column(0).Data()
  1166  
  1167  	ps.Equal(len(exChunk.Chunks()), len(tblChunk.Chunks()))
  1168  	exc := exChunk.Chunk(0)
  1169  	tbc := tblChunk.Chunk(0)
  1170  	ps.Truef(array.ApproxEqual(exc, tbc), "expected: %T %s\ngot: %T %s", exc, exc, tbc, tbc)
  1171  }
  1172  
  1173  func makeEmptyListsArray(size int) arrow.Array {
  1174  	// allocate an offsets buffer with only zeros
  1175  	offsetsNbytes := arrow.Int32Traits.BytesRequired(size + 1)
  1176  	offsetsBuffer := make([]byte, offsetsNbytes)
  1177  
  1178  	childBuffers := []*memory.Buffer{nil, nil}
  1179  	childData := array.NewData(arrow.PrimitiveTypes.Float32, 0, childBuffers, nil, 0, 0)
  1180  	defer childData.Release()
  1181  	buffers := []*memory.Buffer{nil, memory.NewBufferBytes(offsetsBuffer)}
  1182  	arrayData := array.NewData(arrow.ListOf(childData.DataType()), size, buffers, []arrow.ArrayData{childData}, 0, 0)
  1183  	defer arrayData.Release()
  1184  	return array.MakeFromData(arrayData)
  1185  }
  1186  
  1187  func makeListArray(values arrow.Array, size, nullcount int) arrow.Array {
  1188  	nonNullEntries := size - nullcount - 1
  1189  	lengthPerEntry := values.Len() / nonNullEntries
  1190  
  1191  	offsets := make([]byte, arrow.Int32Traits.BytesRequired(size+1))
  1192  	offsetsArr := arrow.Int32Traits.CastFromBytes(offsets)
  1193  
  1194  	nullBitmap := make([]byte, int(bitutil.BytesForBits(int64(size))))
  1195  
  1196  	curOffset := 0
  1197  	for i := 0; i < size; i++ {
  1198  		offsetsArr[i] = int32(curOffset)
  1199  		if !(((i % 2) == 0) && ((i / 2) < nullcount)) {
  1200  			// non-null list (list with index 1 is always empty)
  1201  			bitutil.SetBit(nullBitmap, i)
  1202  			if i != 1 {
  1203  				curOffset += lengthPerEntry
  1204  			}
  1205  		}
  1206  	}
  1207  	offsetsArr[size] = int32(values.Len())
  1208  
  1209  	listData := array.NewData(arrow.ListOf(values.DataType()), size,
  1210  		[]*memory.Buffer{memory.NewBufferBytes(nullBitmap), memory.NewBufferBytes(offsets)},
  1211  		[]arrow.ArrayData{values.Data()}, nullcount, 0)
  1212  	defer listData.Release()
  1213  	return array.NewListData(listData)
  1214  }
  1215  
  1216  func prepareEmptyListsTable(size int) arrow.Table {
  1217  	lists := makeEmptyListsArray(size)
  1218  	defer lists.Release()
  1219  	chunked := arrow.NewChunked(lists.DataType(), []arrow.Array{lists})
  1220  	defer chunked.Release()
  1221  	return makeSimpleTable(chunked, true)
  1222  }
  1223  
  1224  func prepareListTable(dt arrow.DataType, size int, nullableLists bool, nullableElems bool, nullCount int) arrow.Table {
  1225  	nc := nullCount
  1226  	if !nullableElems {
  1227  		nc = 0
  1228  	}
  1229  	values := testutils.RandomNullable(dt, size*size, nc)
  1230  	defer values.Release()
  1231  	// also test that slice offsets are respected
  1232  	values = array.NewSlice(values, 5, int64(values.Len()))
  1233  	defer values.Release()
  1234  
  1235  	if !nullableLists {
  1236  		nullCount = 0
  1237  	}
  1238  	lists := makeListArray(values, size, nullCount)
  1239  	defer lists.Release()
  1240  
  1241  	chunked := arrow.NewChunked(lists.DataType(), []arrow.Array{lists})
  1242  	defer chunked.Release()
  1243  
  1244  	return makeSimpleTable(array.NewChunkedSlice(chunked, 3, int64(size)), nullableLists)
  1245  }
  1246  
  1247  func prepareListOfListTable(dt arrow.DataType, size, nullCount int, nullableParentLists, nullableLists, nullableElems bool) arrow.Table {
  1248  	nc := nullCount
  1249  	if !nullableElems {
  1250  		nc = 0
  1251  	}
  1252  
  1253  	values := testutils.RandomNullable(dt, size*6, nc)
  1254  	defer values.Release()
  1255  
  1256  	if nullableLists {
  1257  		nc = nullCount
  1258  	} else {
  1259  		nc = 0
  1260  	}
  1261  
  1262  	lists := makeListArray(values, size*3, nc)
  1263  	defer lists.Release()
  1264  
  1265  	if !nullableParentLists {
  1266  		nullCount = 0
  1267  	}
  1268  
  1269  	parentLists := makeListArray(lists, size, nullCount)
  1270  	defer parentLists.Release()
  1271  
  1272  	chunked := arrow.NewChunked(parentLists.DataType(), []arrow.Array{parentLists})
  1273  	defer chunked.Release()
  1274  
  1275  	return makeSimpleTable(chunked, nullableParentLists)
  1276  }
  1277  
  1278  func (ps *ParquetIOTestSuite) TestSingleEmptyListsColumnReadWrite() {
  1279  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1280  	defer mem.AssertSize(ps.T(), 0)
  1281  
  1282  	expected := prepareEmptyListsTable(smallSize)
  1283  	defer expected.Release()
  1284  	buf := writeTableToBuffer(ps.T(), mem, expected, smallSize, pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem)))
  1285  	defer buf.Release()
  1286  
  1287  	reader := ps.createReader(mem, buf.Bytes())
  1288  	tbl := ps.readTable(reader)
  1289  	defer tbl.Release()
  1290  
  1291  	ps.EqualValues(expected.NumCols(), tbl.NumCols())
  1292  	ps.EqualValues(expected.NumRows(), tbl.NumRows())
  1293  
  1294  	exChunk := expected.Column(0).Data()
  1295  	tblChunk := tbl.Column(0).Data()
  1296  
  1297  	ps.Equal(len(exChunk.Chunks()), len(tblChunk.Chunks()))
  1298  	ps.True(array.Equal(exChunk.Chunk(0), tblChunk.Chunk(0)))
  1299  }
  1300  
  1301  func (ps *ParquetIOTestSuite) TestSingleColumnOptionalReadWrite() {
  1302  	for _, dt := range fullTypeList {
  1303  		ps.Run(dt.Name(), func() {
  1304  			mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1305  			defer mem.AssertSize(ps.T(), 0)
  1306  
  1307  			values := testutils.RandomNullable(dt, smallSize, 10)
  1308  			defer values.Release()
  1309  			sc := ps.makeSimpleSchema(dt, parquet.Repetitions.Optional)
  1310  			data := ps.writeColumn(mem, sc, values)
  1311  			ps.readAndCheckSingleColumnFile(mem, data, values)
  1312  		})
  1313  	}
  1314  }
  1315  
  1316  func (ps *ParquetIOTestSuite) TestSingleNullableListNullableColumnReadWrite() {
  1317  	for _, dt := range fullTypeList {
  1318  		ps.Run(dt.Name(), func() {
  1319  			expected := prepareListTable(dt, smallSize, true, true, 10)
  1320  			defer expected.Release()
  1321  			ps.roundTripTable(memory.DefaultAllocator, expected, false)
  1322  		})
  1323  	}
  1324  }
  1325  
  1326  func (ps *ParquetIOTestSuite) TestSingleRequiredListNullableColumnReadWrite() {
  1327  	for _, dt := range fullTypeList {
  1328  		ps.Run(dt.Name(), func() {
  1329  			expected := prepareListTable(dt, smallSize, false, true, 10)
  1330  			defer expected.Release()
  1331  			ps.roundTripTable(memory.DefaultAllocator, expected, false)
  1332  		})
  1333  	}
  1334  }
  1335  
  1336  func (ps *ParquetIOTestSuite) TestSingleNullableListRequiredColumnReadWrite() {
  1337  	for _, dt := range fullTypeList {
  1338  		ps.Run(dt.Name(), func() {
  1339  			expected := prepareListTable(dt, smallSize, true, false, 10)
  1340  			defer expected.Release()
  1341  			ps.roundTripTable(memory.DefaultAllocator, expected, false)
  1342  		})
  1343  	}
  1344  }
  1345  
  1346  func (ps *ParquetIOTestSuite) TestSingleRequiredListRequiredColumnReadWrite() {
  1347  	for _, dt := range fullTypeList {
  1348  		ps.Run(dt.Name(), func() {
  1349  			expected := prepareListTable(dt, smallSize, false, false, 0)
  1350  			defer expected.Release()
  1351  			ps.roundTripTable(memory.DefaultAllocator, expected, false)
  1352  		})
  1353  	}
  1354  }
  1355  
  1356  func (ps *ParquetIOTestSuite) TestSingleNullableListRequiredListRequiredColumnReadWrite() {
  1357  	for _, dt := range fullTypeList {
  1358  		ps.Run(dt.Name(), func() {
  1359  			expected := prepareListOfListTable(dt, smallSize, 2, true, false, false)
  1360  			defer expected.Release()
  1361  			ps.roundTripTable(memory.DefaultAllocator, expected, false)
  1362  		})
  1363  	}
  1364  }
  1365  
  1366  func (ps *ParquetIOTestSuite) TestSimpleStruct() {
  1367  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1368  	defer mem.AssertSize(ps.T(), 0)
  1369  
  1370  	links := arrow.StructOf(arrow.Field{Name: "Backward", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
  1371  		arrow.Field{Name: "Forward", Type: arrow.PrimitiveTypes.Int64, Nullable: true})
  1372  
  1373  	bldr := array.NewStructBuilder(mem, links)
  1374  	defer bldr.Release()
  1375  
  1376  	backBldr := bldr.FieldBuilder(0).(*array.Int64Builder)
  1377  	forwardBldr := bldr.FieldBuilder(1).(*array.Int64Builder)
  1378  
  1379  	bldr.Append(true)
  1380  	backBldr.AppendNull()
  1381  	forwardBldr.Append(20)
  1382  
  1383  	bldr.Append(true)
  1384  	backBldr.Append(10)
  1385  	forwardBldr.Append(40)
  1386  
  1387  	data := bldr.NewArray()
  1388  	defer data.Release()
  1389  
  1390  	tbl := array.NewTable(arrow.NewSchema([]arrow.Field{{Name: "links", Type: links}}, nil),
  1391  		[]arrow.Column{*arrow.NewColumn(arrow.Field{Name: "links", Type: links}, arrow.NewChunked(links, []arrow.Array{data}))}, -1)
  1392  	defer data.Release() // NewChunked
  1393  	defer tbl.Release()
  1394  
  1395  	ps.roundTripTable(mem, tbl, false)
  1396  }
  1397  
  1398  func (ps *ParquetIOTestSuite) TestSingleColumnNullableStruct() {
  1399  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1400  	defer mem.AssertSize(ps.T(), 0)
  1401  
  1402  	links := arrow.StructOf(arrow.Field{Name: "Backward", Type: arrow.PrimitiveTypes.Int64, Nullable: true})
  1403  	bldr := array.NewStructBuilder(mem, links)
  1404  	defer bldr.Release()
  1405  
  1406  	backBldr := bldr.FieldBuilder(0).(*array.Int64Builder)
  1407  
  1408  	bldr.AppendNull()
  1409  	bldr.Append(true)
  1410  	backBldr.Append(10)
  1411  
  1412  	data := bldr.NewArray()
  1413  	defer data.Release()
  1414  
  1415  	tbl := array.NewTable(arrow.NewSchema([]arrow.Field{{Name: "links", Type: links, Nullable: true}}, nil),
  1416  		[]arrow.Column{*arrow.NewColumn(arrow.Field{Name: "links", Type: links, Nullable: true}, arrow.NewChunked(links, []arrow.Array{data}))}, -1)
  1417  	defer data.Release() // NewChunked
  1418  	defer tbl.Release()
  1419  
  1420  	ps.roundTripTable(mem, tbl, false)
  1421  }
  1422  
  1423  func (ps *ParquetIOTestSuite) TestNestedRequiredFieldStruct() {
  1424  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1425  	defer mem.AssertSize(ps.T(), 0)
  1426  
  1427  	intField := arrow.Field{Name: "int_array", Type: arrow.PrimitiveTypes.Int32}
  1428  	intBldr := array.NewInt32Builder(mem)
  1429  	defer intBldr.Release()
  1430  	intBldr.AppendValues([]int32{0, 1, 2, 3, 4, 5, 7, 8}, nil)
  1431  
  1432  	intArr := intBldr.NewArray()
  1433  	defer intArr.Release()
  1434  
  1435  	validity := memory.NewBufferBytes([]byte{0xCC})
  1436  	defer validity.Release()
  1437  
  1438  	structField := arrow.Field{Name: "root", Type: arrow.StructOf(intField), Nullable: true}
  1439  	structData := array.NewData(structField.Type, 8, []*memory.Buffer{validity}, []arrow.ArrayData{intArr.Data()}, 4, 0)
  1440  	defer structData.Release()
  1441  	stData := array.NewStructData(structData)
  1442  	defer stData.Release()
  1443  
  1444  	tbl := array.NewTable(arrow.NewSchema([]arrow.Field{structField}, nil),
  1445  		[]arrow.Column{*arrow.NewColumn(structField,
  1446  			arrow.NewChunked(structField.Type, []arrow.Array{stData}))}, -1)
  1447  	defer stData.Release() // NewChunked
  1448  	defer tbl.Release()
  1449  
  1450  	ps.roundTripTable(mem, tbl, false)
  1451  }
  1452  
  1453  func (ps *ParquetIOTestSuite) TestNestedNullableField() {
  1454  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1455  	defer mem.AssertSize(ps.T(), 0)
  1456  
  1457  	intField := arrow.Field{Name: "int_array", Type: arrow.PrimitiveTypes.Int32, Nullable: true}
  1458  	intBldr := array.NewInt32Builder(mem)
  1459  	defer intBldr.Release()
  1460  	intBldr.AppendValues([]int32{0, 1, 2, 3, 4, 5, 7, 8}, []bool{true, false, true, false, true, true, false, true})
  1461  
  1462  	intArr := intBldr.NewArray()
  1463  	defer intArr.Release()
  1464  
  1465  	validity := memory.NewBufferBytes([]byte{0xCC})
  1466  	defer validity.Release()
  1467  
  1468  	structField := arrow.Field{Name: "root", Type: arrow.StructOf(intField), Nullable: true}
  1469  	data := array.NewData(structField.Type, 8, []*memory.Buffer{validity}, []arrow.ArrayData{intArr.Data()}, 4, 0)
  1470  	defer data.Release()
  1471  	stData := array.NewStructData(data)
  1472  	defer stData.Release()
  1473  
  1474  	tbl := array.NewTable(arrow.NewSchema([]arrow.Field{structField}, nil),
  1475  		[]arrow.Column{*arrow.NewColumn(structField,
  1476  			arrow.NewChunked(structField.Type, []arrow.Array{stData}))}, -1)
  1477  	defer stData.Release() // NewChunked
  1478  	defer tbl.Release()
  1479  
  1480  	ps.roundTripTable(mem, tbl, false)
  1481  }
  1482  
  1483  func (ps *ParquetIOTestSuite) TestNestedEmptyList() {
  1484  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1485  	defer mem.AssertSize(ps.T(), 0)
  1486  
  1487  	bldr := array.NewStructBuilder(mem, arrow.StructOf(
  1488  		arrow.Field{
  1489  			Name: "root",
  1490  			Type: arrow.StructOf(
  1491  				arrow.Field{
  1492  					Name: "child1",
  1493  					Type: arrow.ListOf(arrow.StructOf(
  1494  						arrow.Field{
  1495  							Name: "child2",
  1496  							Type: arrow.ListOf(arrow.StructOf(
  1497  								arrow.Field{
  1498  									Name: "name",
  1499  									Type: arrow.BinaryTypes.String,
  1500  								},
  1501  							)),
  1502  						},
  1503  					)),
  1504  				},
  1505  			),
  1506  		},
  1507  	))
  1508  	defer bldr.Release()
  1509  
  1510  	rootBldr := bldr.FieldBuilder(0).(*array.StructBuilder)
  1511  	child1Bldr := rootBldr.FieldBuilder(0).(*array.ListBuilder)
  1512  	child1ElBldr := child1Bldr.ValueBuilder().(*array.StructBuilder)
  1513  	child2Bldr := child1ElBldr.FieldBuilder(0).(*array.ListBuilder)
  1514  	leafBldr := child2Bldr.ValueBuilder().(*array.StructBuilder)
  1515  	nameBldr := leafBldr.FieldBuilder(0).(*array.StringBuilder)
  1516  
  1517  	// target structure 8 times
  1518  	// {
  1519  	//   "root": {
  1520  	//     "child1": [
  1521  	//       { "child2": [{ "name": "foo" }] },
  1522  	//       { "child2": [] }
  1523  	//     ]
  1524  	//   }
  1525  	// }
  1526  
  1527  	for i := 0; i < 8; i++ {
  1528  		bldr.Append(true)
  1529  		rootBldr.Append(true)
  1530  		child1Bldr.Append(true)
  1531  
  1532  		child1ElBldr.Append(true)
  1533  		child2Bldr.Append(true)
  1534  		leafBldr.Append(true)
  1535  		nameBldr.Append("foo")
  1536  
  1537  		child1ElBldr.Append(true)
  1538  		child2Bldr.Append(true)
  1539  	}
  1540  
  1541  	arr := bldr.NewArray()
  1542  	defer arr.Release()
  1543  
  1544  	field := arrow.Field{Name: "x", Type: arr.DataType(), Nullable: true}
  1545  	expected := array.NewTableFromSlice(arrow.NewSchema([]arrow.Field{field}, nil), [][]arrow.Array{{arr}})
  1546  	defer expected.Release()
  1547  
  1548  	ps.roundTripTable(mem, expected, false)
  1549  }
  1550  
  1551  func (ps *ParquetIOTestSuite) TestCanonicalNestedRoundTrip() {
  1552  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1553  	defer mem.AssertSize(ps.T(), 0)
  1554  
  1555  	docIdField := arrow.Field{Name: "DocID", Type: arrow.PrimitiveTypes.Int64}
  1556  	linksField := arrow.Field{Name: "Links", Type: arrow.StructOf(
  1557  		arrow.Field{Name: "Backward", Type: arrow.ListOf(arrow.PrimitiveTypes.Int64)},
  1558  		arrow.Field{Name: "Forward", Type: arrow.ListOf(arrow.PrimitiveTypes.Int64)},
  1559  	), Nullable: true}
  1560  
  1561  	nameStruct := arrow.StructOf(
  1562  		arrow.Field{Name: "Language", Nullable: true, Type: arrow.ListOf(
  1563  			arrow.StructOf(arrow.Field{Name: "Code", Type: arrow.BinaryTypes.String},
  1564  				arrow.Field{Name: "Country", Type: arrow.BinaryTypes.String, Nullable: true}))},
  1565  		arrow.Field{Name: "Url", Type: arrow.BinaryTypes.String, Nullable: true})
  1566  
  1567  	nameField := arrow.Field{Name: "Name", Type: arrow.ListOf(nameStruct)}
  1568  	sc := arrow.NewSchema([]arrow.Field{docIdField, linksField, nameField}, nil)
  1569  
  1570  	docIDArr, _, err := array.FromJSON(mem, docIdField.Type, strings.NewReader("[10, 20]"))
  1571  	ps.Require().NoError(err)
  1572  	defer docIDArr.Release()
  1573  
  1574  	linksIDArr, _, err := array.FromJSON(mem, linksField.Type, strings.NewReader(`[{"Backward":[], "Forward":[20, 40, 60]}, {"Backward":[10, 30], "Forward": [80]}]`))
  1575  	ps.Require().NoError(err)
  1576  	defer linksIDArr.Release()
  1577  
  1578  	nameArr, _, err := array.FromJSON(mem, nameField.Type, strings.NewReader(`
  1579  			[[{"Language": [{"Code": "en_us", "Country": "us"},
  1580  							{"Code": "en_us", "Country": null}],
  1581  			   "Url": "http://A"},
  1582  			  {"Url": "http://B", "Language": null},
  1583  			  {"Language": [{"Code": "en-gb", "Country": "gb"}], "Url": null}],
  1584  			  [{"Url": "http://C", "Language": null}]]`))
  1585  	ps.Require().NoError(err)
  1586  	defer nameArr.Release()
  1587  
  1588  	expected := array.NewTable(sc, []arrow.Column{
  1589  		*arrow.NewColumn(docIdField, arrow.NewChunked(docIdField.Type, []arrow.Array{docIDArr})),
  1590  		*arrow.NewColumn(linksField, arrow.NewChunked(linksField.Type, []arrow.Array{linksIDArr})),
  1591  		*arrow.NewColumn(nameField, arrow.NewChunked(nameField.Type, []arrow.Array{nameArr})),
  1592  	}, 2)
  1593  	defer docIDArr.Release()   // NewChunked
  1594  	defer linksIDArr.Release() // NewChunked
  1595  	defer nameArr.Release()    // NewChunked
  1596  	defer expected.Release()
  1597  
  1598  	ps.roundTripTable(mem, expected, false)
  1599  }
  1600  
  1601  func (ps *ParquetIOTestSuite) TestFixedSizeList() {
  1602  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1603  	defer mem.AssertSize(ps.T(), 0)
  1604  
  1605  	bldr := array.NewFixedSizeListBuilder(mem, 3, arrow.PrimitiveTypes.Int16)
  1606  	defer bldr.Release()
  1607  
  1608  	vb := bldr.ValueBuilder().(*array.Int16Builder)
  1609  
  1610  	bldr.AppendValues([]bool{true, true, true})
  1611  	vb.AppendValues([]int16{1, 2, 3, 4, 5, 6, 7, 8, 9}, nil)
  1612  
  1613  	data := bldr.NewArray()
  1614  	defer data.Release() // NewArray
  1615  
  1616  	field := arrow.Field{Name: "root", Type: data.DataType(), Nullable: true}
  1617  	cnk := arrow.NewChunked(field.Type, []arrow.Array{data})
  1618  	defer data.Release() // NewChunked
  1619  
  1620  	tbl := array.NewTable(arrow.NewSchema([]arrow.Field{field}, nil), []arrow.Column{*arrow.NewColumn(field, cnk)}, -1)
  1621  	defer cnk.Release() // NewColumn
  1622  	defer tbl.Release()
  1623  
  1624  	ps.roundTripTable(mem, tbl, true)
  1625  }
  1626  
  1627  func (ps *ParquetIOTestSuite) TestNull() {
  1628  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1629  	defer mem.AssertSize(ps.T(), 0)
  1630  
  1631  	bldr := array.NewNullBuilder(mem)
  1632  	defer bldr.Release()
  1633  
  1634  	bldr.AppendNull()
  1635  	bldr.AppendNull()
  1636  	bldr.AppendNull()
  1637  
  1638  	data := bldr.NewArray()
  1639  	defer data.Release()
  1640  
  1641  	field := arrow.Field{Name: "x", Type: data.DataType(), Nullable: true}
  1642  	expected := array.NewTable(
  1643  		arrow.NewSchema([]arrow.Field{field}, nil),
  1644  		[]arrow.Column{*arrow.NewColumn(field, arrow.NewChunked(field.Type, []arrow.Array{data}))},
  1645  		-1,
  1646  	)
  1647  
  1648  	ps.roundTripTable(mem, expected, true)
  1649  }
  1650  
  1651  // ARROW-17169
  1652  func (ps *ParquetIOTestSuite) TestNullableListOfStruct() {
  1653  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1654  	defer mem.AssertSize(ps.T(), 0)
  1655  
  1656  	bldr := array.NewListBuilder(mem, arrow.StructOf(
  1657  		arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int32},
  1658  		arrow.Field{Name: "b", Type: arrow.BinaryTypes.String},
  1659  	))
  1660  	defer bldr.Release()
  1661  
  1662  	stBldr := bldr.ValueBuilder().(*array.StructBuilder)
  1663  	aBldr := stBldr.FieldBuilder(0).(*array.Int32Builder)
  1664  	bBldr := stBldr.FieldBuilder(1).(*array.StringBuilder)
  1665  
  1666  	for i := 0; i < 320; i++ {
  1667  		if i%5 == 0 {
  1668  			bldr.AppendNull()
  1669  			continue
  1670  		}
  1671  		bldr.Append(true)
  1672  		for j := 0; j < 4; j++ {
  1673  			stBldr.Append(true)
  1674  			aBldr.Append(int32(i + j))
  1675  			bBldr.Append(strconv.Itoa(i + j))
  1676  		}
  1677  	}
  1678  
  1679  	arr := bldr.NewArray()
  1680  	defer arr.Release()
  1681  
  1682  	field := arrow.Field{Name: "x", Type: arr.DataType(), Nullable: true}
  1683  	expected := array.NewTable(arrow.NewSchema([]arrow.Field{field}, nil),
  1684  		[]arrow.Column{*arrow.NewColumn(field, arrow.NewChunked(field.Type, []arrow.Array{arr}))}, -1)
  1685  	defer arr.Release() // NewChunked
  1686  	defer expected.Release()
  1687  
  1688  	ps.roundTripTable(mem, expected, false)
  1689  }
  1690  
  1691  func (ps *ParquetIOTestSuite) TestStructWithListOfNestedStructs() {
  1692  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1693  	defer mem.AssertSize(ps.T(), 0)
  1694  
  1695  	bldr := array.NewStructBuilder(mem, arrow.StructOf(
  1696  		arrow.Field{
  1697  			Nullable: true,
  1698  			Name:     "l",
  1699  			Type: arrow.ListOf(arrow.StructOf(
  1700  				arrow.Field{
  1701  					Nullable: true,
  1702  					Name:     "a",
  1703  					Type: arrow.StructOf(
  1704  						arrow.Field{
  1705  							Nullable: true,
  1706  							Name:     "b",
  1707  							Type:     arrow.BinaryTypes.String,
  1708  						},
  1709  					),
  1710  				},
  1711  			)),
  1712  		},
  1713  	))
  1714  	defer bldr.Release()
  1715  
  1716  	lBldr := bldr.FieldBuilder(0).(*array.ListBuilder)
  1717  	stBldr := lBldr.ValueBuilder().(*array.StructBuilder)
  1718  	aBldr := stBldr.FieldBuilder(0).(*array.StructBuilder)
  1719  	bBldr := aBldr.FieldBuilder(0).(*array.StringBuilder)
  1720  
  1721  	bldr.AppendNull()
  1722  	bldr.Append(true)
  1723  	lBldr.Append(true)
  1724  	for i := 0; i < 8; i++ {
  1725  		stBldr.Append(true)
  1726  		aBldr.Append(true)
  1727  		bBldr.Append(strconv.Itoa(i))
  1728  	}
  1729  
  1730  	arr := bldr.NewArray()
  1731  	defer arr.Release()
  1732  
  1733  	field := arrow.Field{Name: "x", Type: arr.DataType(), Nullable: true}
  1734  	expected := array.NewTable(arrow.NewSchema([]arrow.Field{field}, nil),
  1735  		[]arrow.Column{*arrow.NewColumn(field, arrow.NewChunked(field.Type, []arrow.Array{arr}))}, -1)
  1736  	defer arr.Release() // NewChunked
  1737  	defer expected.Release()
  1738  
  1739  	ps.roundTripTable(mem, expected, false)
  1740  }
  1741  
  1742  func TestParquetArrowIO(t *testing.T) {
  1743  	suite.Run(t, new(ParquetIOTestSuite))
  1744  }
  1745  
  1746  func TestBufferedRecWrite(t *testing.T) {
  1747  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1748  	defer mem.AssertSize(t, 0)
  1749  
  1750  	sc := arrow.NewSchema([]arrow.Field{
  1751  		{Name: "f32", Type: arrow.PrimitiveTypes.Float32, Nullable: true},
  1752  		{Name: "i32", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
  1753  		{Name: "struct_i64_f64", Type: arrow.StructOf(
  1754  			arrow.Field{Name: "i64", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
  1755  			arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64, Nullable: true})},
  1756  	}, nil)
  1757  
  1758  	structData := array.NewData(sc.Field(2).Type, SIZELEN,
  1759  		[]*memory.Buffer{nil, nil},
  1760  		[]arrow.ArrayData{testutils.RandomNullable(arrow.PrimitiveTypes.Int64, SIZELEN, 0).Data(), testutils.RandomNullable(arrow.PrimitiveTypes.Float64, SIZELEN, 0).Data()}, 0, 0)
  1761  	defer structData.Release()
  1762  	cols := []arrow.Array{
  1763  		testutils.RandomNullable(sc.Field(0).Type, SIZELEN, SIZELEN/5),
  1764  		testutils.RandomNullable(sc.Field(1).Type, SIZELEN, SIZELEN/5),
  1765  		array.NewStructData(structData),
  1766  	}
  1767  
  1768  	rec := array.NewRecord(sc, cols, SIZELEN)
  1769  	defer rec.Release()
  1770  
  1771  	var (
  1772  		buf bytes.Buffer
  1773  	)
  1774  
  1775  	wr, err := pqarrow.NewFileWriter(sc, &buf,
  1776  		parquet.NewWriterProperties(parquet.WithCompression(compress.Codecs.Snappy), parquet.WithDictionaryDefault(false), parquet.WithDataPageSize(100*1024)),
  1777  		pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem)))
  1778  	require.NoError(t, err)
  1779  
  1780  	p1 := rec.NewSlice(0, SIZELEN/2)
  1781  	defer p1.Release()
  1782  	require.NoError(t, wr.WriteBuffered(p1))
  1783  
  1784  	p2 := rec.NewSlice(SIZELEN/2, SIZELEN)
  1785  	defer p2.Release()
  1786  	require.NoError(t, wr.WriteBuffered(p2))
  1787  
  1788  	wr.Close()
  1789  
  1790  	rdr, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()))
  1791  	assert.NoError(t, err)
  1792  
  1793  	assert.EqualValues(t, 1, rdr.NumRowGroups())
  1794  	assert.EqualValues(t, SIZELEN, rdr.NumRows())
  1795  	rdr.Close()
  1796  
  1797  	tbl, err := pqarrow.ReadTable(context.Background(), bytes.NewReader(buf.Bytes()), nil, pqarrow.ArrowReadProperties{}, nil)
  1798  	assert.NoError(t, err)
  1799  	defer tbl.Release()
  1800  
  1801  	assert.EqualValues(t, SIZELEN, tbl.NumRows())
  1802  }
  1803  
  1804  func (ps *ParquetIOTestSuite) TestArrowMapTypeRoundTrip() {
  1805  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1806  	defer mem.AssertSize(ps.T(), 0)
  1807  
  1808  	bldr := array.NewMapBuilder(mem, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false)
  1809  	defer bldr.Release()
  1810  
  1811  	kb := bldr.KeyBuilder().(*array.StringBuilder)
  1812  	ib := bldr.ItemBuilder().(*array.Int32Builder)
  1813  
  1814  	bldr.Append(true)
  1815  	kb.AppendValues([]string{"Fee", "Fi", "Fo", "Fum"}, nil)
  1816  	ib.AppendValues([]int32{1, 2, 3, 4}, nil)
  1817  
  1818  	bldr.Append(true)
  1819  	kb.AppendValues([]string{"Fee", "Fi", "Fo"}, nil)
  1820  	ib.AppendValues([]int32{5, 4, 3}, nil)
  1821  
  1822  	bldr.AppendNull()
  1823  
  1824  	bldr.Append(true)
  1825  	kb.AppendValues([]string{"Fo", "Fi", "Fee"}, nil)
  1826  	ib.AppendValues([]int32{-1, 2, 3}, []bool{false, true, true})
  1827  
  1828  	arr := bldr.NewArray()
  1829  	defer arr.Release()
  1830  
  1831  	fld := arrow.Field{Name: "mapped", Type: arr.DataType(), Nullable: true}
  1832  	cnk := arrow.NewChunked(arr.DataType(), []arrow.Array{arr})
  1833  	defer arr.Release() // NewChunked
  1834  	tbl := array.NewTable(arrow.NewSchema([]arrow.Field{fld}, nil), []arrow.Column{*arrow.NewColumn(fld, cnk)}, -1)
  1835  	defer cnk.Release() // NewColumn
  1836  	defer tbl.Release()
  1837  
  1838  	ps.roundTripTable(mem, tbl, true)
  1839  }
  1840  
  1841  func (ps *ParquetIOTestSuite) TestArrowExtensionTypeRoundTrip() {
  1842  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1843  	defer mem.AssertSize(ps.T(), 0)
  1844  
  1845  	extBuilder := array.NewExtensionBuilder(mem, types.NewUUIDType())
  1846  	defer extBuilder.Release()
  1847  	builder := types.NewUUIDBuilder(extBuilder)
  1848  	builder.Append(uuid.New())
  1849  	arr := builder.NewArray()
  1850  	defer arr.Release()
  1851  
  1852  	fld := arrow.Field{Name: "uuid", Type: arr.DataType(), Nullable: true}
  1853  	cnk := arrow.NewChunked(arr.DataType(), []arrow.Array{arr})
  1854  	defer arr.Release() // NewChunked
  1855  	tbl := array.NewTable(arrow.NewSchema([]arrow.Field{fld}, nil), []arrow.Column{*arrow.NewColumn(fld, cnk)}, -1)
  1856  	defer cnk.Release() // NewColumn
  1857  	defer tbl.Release()
  1858  
  1859  	ps.roundTripTable(mem, tbl, true)
  1860  }
  1861  
  1862  func (ps *ParquetIOTestSuite) TestArrowUnknownExtensionTypeRoundTrip() {
  1863  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1864  	defer mem.AssertSize(ps.T(), 0)
  1865  
  1866  	var written, expected arrow.Table
  1867  
  1868  	{
  1869  		// Prepare `written` table with the extension type registered.
  1870  		extType := types.NewUUIDType()
  1871  		bldr := array.NewExtensionBuilder(mem, extType)
  1872  		defer bldr.Release()
  1873  
  1874  		bldr.Builder.(*array.FixedSizeBinaryBuilder).AppendValues(
  1875  			[][]byte{nil, []byte("abcdefghijklmno0"), []byte("abcdefghijklmno1"), []byte("abcdefghijklmno2")},
  1876  			[]bool{false, true, true, true})
  1877  
  1878  		arr := bldr.NewArray()
  1879  		defer arr.Release()
  1880  
  1881  		if arrow.GetExtensionType("uuid") != nil {
  1882  			ps.NoError(arrow.UnregisterExtensionType("uuid"))
  1883  		}
  1884  
  1885  		fld := arrow.Field{Name: "uuid", Type: arr.DataType(), Nullable: true}
  1886  		cnk := arrow.NewChunked(arr.DataType(), []arrow.Array{arr})
  1887  		defer arr.Release() // NewChunked
  1888  		written = array.NewTable(arrow.NewSchema([]arrow.Field{fld}, nil), []arrow.Column{*arrow.NewColumn(fld, cnk)}, -1)
  1889  		defer cnk.Release() // NewColumn
  1890  		defer written.Release()
  1891  	}
  1892  
  1893  	{
  1894  		// Prepare `expected` table with the extension type unregistered in the underlying type.
  1895  		bldr := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 16})
  1896  		defer bldr.Release()
  1897  		bldr.AppendValues(
  1898  			[][]byte{nil, []byte("abcdefghijklmno0"), []byte("abcdefghijklmno1"), []byte("abcdefghijklmno2")},
  1899  			[]bool{false, true, true, true})
  1900  
  1901  		arr := bldr.NewArray()
  1902  		defer arr.Release()
  1903  
  1904  		fld := arrow.Field{Name: "uuid", Type: arr.DataType(), Nullable: true}
  1905  		cnk := arrow.NewChunked(arr.DataType(), []arrow.Array{arr})
  1906  		defer arr.Release() // NewChunked
  1907  		expected = array.NewTable(arrow.NewSchema([]arrow.Field{fld}, nil), []arrow.Column{*arrow.NewColumn(fld, cnk)}, -1)
  1908  		defer cnk.Release() // NewColumn
  1909  		defer expected.Release()
  1910  	}
  1911  
  1912  	// sanity check before going deeper
  1913  	ps.Equal(expected.NumCols(), written.NumCols())
  1914  	ps.Equal(expected.NumRows(), written.NumRows())
  1915  
  1916  	// just like roundTripTable() but different written vs. expected tables
  1917  	var buf bytes.Buffer
  1918  	props := pqarrow.NewArrowWriterProperties(pqarrow.WithStoreSchema(), pqarrow.WithAllocator(mem))
  1919  
  1920  	writeProps := parquet.NewWriterProperties(parquet.WithAllocator(mem))
  1921  	ps.Require().NoError(pqarrow.WriteTable(written, &buf, written.NumRows(), writeProps, props))
  1922  
  1923  	reader := ps.createReader(mem, buf.Bytes())
  1924  	defer reader.ParquetReader().Close()
  1925  
  1926  	tbl := ps.readTable(reader)
  1927  	defer tbl.Release()
  1928  
  1929  	ps.Equal(expected.NumCols(), tbl.NumCols())
  1930  	ps.Equal(expected.NumRows(), tbl.NumRows())
  1931  
  1932  	exChunk := expected.Column(0).Data()
  1933  	tblChunk := tbl.Column(0).Data()
  1934  
  1935  	ps.Equal(len(exChunk.Chunks()), len(tblChunk.Chunks()))
  1936  	exc := exChunk.Chunk(0)
  1937  	tbc := tblChunk.Chunk(0)
  1938  	ps.Truef(array.Equal(exc, tbc), "expected: %T %s\ngot: %T %s", exc, exc, tbc, tbc)
  1939  
  1940  	expectedMd := arrow.MetadataFrom(map[string]string{
  1941  		ipc.ExtensionTypeKeyName:     "uuid",
  1942  		ipc.ExtensionMetadataKeyName: "uuid-serialized",
  1943  		"PARQUET:field_id":           "-1",
  1944  	})
  1945  	ps.Truef(expectedMd.Equal(tbl.Column(0).Field().Metadata), "expected: %v\ngot: %v", expectedMd, tbl.Column(0).Field().Metadata)
  1946  }
  1947  
  1948  func TestWriteTableMemoryAllocation(t *testing.T) {
  1949  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
  1950  	sc := arrow.NewSchema([]arrow.Field{
  1951  		{Name: "f32", Type: arrow.PrimitiveTypes.Float32, Nullable: true},
  1952  		{Name: "i32", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
  1953  		{Name: "struct_i64_f64", Type: arrow.StructOf(
  1954  			arrow.Field{Name: "i64", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
  1955  			arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64, Nullable: true})},
  1956  		{Name: "arr_i64", Type: arrow.ListOf(arrow.PrimitiveTypes.Int64)},
  1957  		{Name: "uuid", Type: types.NewUUIDType(), Nullable: true},
  1958  	}, nil)
  1959  
  1960  	bld := array.NewRecordBuilder(mem, sc)
  1961  	bld.Field(0).(*array.Float32Builder).Append(1.0)
  1962  	bld.Field(1).(*array.Int32Builder).Append(1)
  1963  	sbld := bld.Field(2).(*array.StructBuilder)
  1964  	sbld.Append(true)
  1965  	sbld.FieldBuilder(0).(*array.Int64Builder).Append(1)
  1966  	sbld.FieldBuilder(1).(*array.Float64Builder).Append(1.0)
  1967  	abld := bld.Field(3).(*array.ListBuilder)
  1968  	abld.Append(true)
  1969  	abld.ValueBuilder().(*array.Int64Builder).Append(2)
  1970  	bld.Field(4).(*types.UUIDBuilder).Append(uuid.MustParse("00000000-0000-0000-0000-000000000001"))
  1971  
  1972  	rec := bld.NewRecord()
  1973  	bld.Release()
  1974  
  1975  	var buf bytes.Buffer
  1976  	wr, err := pqarrow.NewFileWriter(sc, &buf,
  1977  		parquet.NewWriterProperties(parquet.WithCompression(compress.Codecs.Snappy)),
  1978  		pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem)))
  1979  	require.NoError(t, err)
  1980  
  1981  	require.NoError(t, wr.Write(rec))
  1982  	rec.Release()
  1983  	wr.Close()
  1984  
  1985  	require.Zero(t, mem.CurrentAlloc())
  1986  }