github.com/apache/arrow/go/v7@v7.0.1/parquet/pqarrow/file_reader_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package pqarrow_test
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"io"
    23  	"os"
    24  	"path/filepath"
    25  	"testing"
    26  
    27  	"github.com/apache/arrow/go/v7/arrow"
    28  	"github.com/apache/arrow/go/v7/arrow/array"
    29  	"github.com/apache/arrow/go/v7/arrow/decimal128"
    30  	"github.com/apache/arrow/go/v7/arrow/memory"
    31  	"github.com/apache/arrow/go/v7/parquet/file"
    32  	"github.com/apache/arrow/go/v7/parquet/pqarrow"
    33  	"github.com/stretchr/testify/assert"
    34  	"github.com/stretchr/testify/require"
    35  )
    36  
    37  func getDataDir() string {
    38  	datadir := os.Getenv("PARQUET_TEST_DATA")
    39  	if datadir == "" {
    40  		panic("please point PARQUET_TEST_DATA env var to the test data directory")
    41  	}
    42  	return datadir
    43  }
    44  
    45  func TestArrowReaderAdHocReadDecimals(t *testing.T) {
    46  	tests := []struct {
    47  		file string
    48  		typ  *arrow.Decimal128Type
    49  	}{
    50  		{"int32_decimal", &arrow.Decimal128Type{Precision: 4, Scale: 2}},
    51  		{"int64_decimal", &arrow.Decimal128Type{Precision: 10, Scale: 2}},
    52  		{"fixed_length_decimal", &arrow.Decimal128Type{Precision: 25, Scale: 2}},
    53  		{"fixed_length_decimal_legacy", &arrow.Decimal128Type{Precision: 13, Scale: 2}},
    54  		{"byte_array_decimal", &arrow.Decimal128Type{Precision: 4, Scale: 2}},
    55  	}
    56  
    57  	dataDir := getDataDir()
    58  	for _, tt := range tests {
    59  		t.Run(tt.file, func(t *testing.T) {
    60  			mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
    61  			defer mem.AssertSize(t, 0)
    62  
    63  			filename := filepath.Join(dataDir, tt.file+".parquet")
    64  			require.FileExists(t, filename)
    65  
    66  			rdr, err := file.OpenParquetFile(filename, false)
    67  			require.NoError(t, err)
    68  			defer rdr.Close()
    69  			arrowRdr, err := pqarrow.NewFileReader(rdr, pqarrow.ArrowReadProperties{}, mem)
    70  			require.NoError(t, err)
    71  
    72  			tbl, err := arrowRdr.ReadTable(context.Background())
    73  			require.NoError(t, err)
    74  			defer tbl.Release()
    75  
    76  			assert.EqualValues(t, 1, tbl.NumCols())
    77  			assert.Truef(t, arrow.TypeEqual(tbl.Schema().Field(0).Type, tt.typ), "expected: %s\ngot: %s", tbl.Schema().Field(0).Type, tt.typ)
    78  
    79  			const expectedLen = 24
    80  			valCol := tbl.Column(0)
    81  
    82  			assert.EqualValues(t, expectedLen, valCol.Len())
    83  			assert.Len(t, valCol.Data().Chunks(), 1)
    84  
    85  			chunk := valCol.Data().Chunk(0)
    86  			bldr := array.NewDecimal128Builder(mem, tt.typ)
    87  			defer bldr.Release()
    88  			for i := 0; i < expectedLen; i++ {
    89  				bldr.Append(decimal128.FromI64(int64((i + 1) * 100)))
    90  			}
    91  
    92  			expectedArr := bldr.NewDecimal128Array()
    93  			defer expectedArr.Release()
    94  
    95  			assert.Truef(t, array.ArrayEqual(expectedArr, chunk), "expected: %s\ngot: %s", expectedArr, chunk)
    96  		})
    97  	}
    98  }
    99  
   100  func TestRecordReaderParallel(t *testing.T) {
   101  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
   102  	defer mem.AssertSize(t, 0)
   103  
   104  	tbl := makeDateTimeTypesTable(mem, true, true)
   105  	defer tbl.Release()
   106  
   107  	var buf bytes.Buffer
   108  	require.NoError(t, pqarrow.WriteTable(tbl, &buf, tbl.NumRows(), nil, pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem))))
   109  
   110  	pf, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()))
   111  	require.NoError(t, err)
   112  
   113  	reader, err := pqarrow.NewFileReader(pf, pqarrow.ArrowReadProperties{BatchSize: 3, Parallel: true}, mem)
   114  	require.NoError(t, err)
   115  
   116  	sc, err := reader.Schema()
   117  	assert.NoError(t, err)
   118  	assert.Truef(t, tbl.Schema().Equal(sc), "expected: %s\ngot: %s", tbl.Schema(), sc)
   119  
   120  	rr, err := reader.GetRecordReader(context.Background(), nil, nil)
   121  	assert.NoError(t, err)
   122  	assert.NotNil(t, rr)
   123  	defer rr.Release()
   124  
   125  	records := make([]arrow.Record, 0)
   126  	for rr.Next() {
   127  		rec := rr.Record()
   128  		defer rec.Release()
   129  
   130  		assert.Truef(t, sc.Equal(rec.Schema()), "expected: %s\ngot: %s", sc, rec.Schema())
   131  		rec.Retain()
   132  		records = append(records, rec)
   133  	}
   134  
   135  	assert.False(t, rr.Next())
   136  
   137  	tr := array.NewTableReader(tbl, 3)
   138  	defer tr.Release()
   139  
   140  	assert.True(t, tr.Next())
   141  	assert.Truef(t, array.RecordEqual(tr.Record(), records[0]), "expected: %s\ngot: %s", tr.Record(), records[0])
   142  	assert.True(t, tr.Next())
   143  	assert.Truef(t, array.RecordEqual(tr.Record(), records[1]), "expected: %s\ngot: %s", tr.Record(), records[1])
   144  }
   145  
   146  func TestRecordReaderSerial(t *testing.T) {
   147  	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
   148  	defer mem.AssertSize(t, 0)
   149  
   150  	tbl := makeDateTimeTypesTable(mem, true, true)
   151  	defer tbl.Release()
   152  
   153  	var buf bytes.Buffer
   154  	require.NoError(t, pqarrow.WriteTable(tbl, &buf, tbl.NumRows(), nil, pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem))))
   155  
   156  	pf, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()))
   157  	require.NoError(t, err)
   158  
   159  	reader, err := pqarrow.NewFileReader(pf, pqarrow.ArrowReadProperties{BatchSize: 2}, mem)
   160  	require.NoError(t, err)
   161  
   162  	sc, err := reader.Schema()
   163  	assert.NoError(t, err)
   164  	assert.Truef(t, tbl.Schema().Equal(sc), "expected: %s\ngot: %s", tbl.Schema(), sc)
   165  
   166  	rr, err := reader.GetRecordReader(context.Background(), nil, nil)
   167  	assert.NoError(t, err)
   168  	assert.NotNil(t, rr)
   169  	defer rr.Release()
   170  
   171  	tr := array.NewTableReader(tbl, 2)
   172  	defer tr.Release()
   173  
   174  	rec, err := rr.Read()
   175  	assert.NoError(t, err)
   176  	tr.Next()
   177  	assert.Truef(t, array.RecordEqual(tr.Record(), rec), "expected: %s\ngot: %s", tr.Record(), rec)
   178  
   179  	rec, err = rr.Read()
   180  	assert.NoError(t, err)
   181  	tr.Next()
   182  	assert.Truef(t, array.RecordEqual(tr.Record(), rec), "expected: %s\ngot: %s", tr.Record(), rec)
   183  
   184  	rec, err = rr.Read()
   185  	assert.NoError(t, err)
   186  	tr.Next()
   187  	assert.Truef(t, array.RecordEqual(tr.Record(), rec), "expected: %s\ngot: %s", tr.Record(), rec)
   188  
   189  	rec, err = rr.Read()
   190  	assert.Same(t, io.EOF, err)
   191  	assert.Nil(t, rec)
   192  }