github.com/apache/arrow/go/v7@v7.0.1/parquet/pqarrow/file_reader_test.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package pqarrow_test 18 19 import ( 20 "bytes" 21 "context" 22 "io" 23 "os" 24 "path/filepath" 25 "testing" 26 27 "github.com/apache/arrow/go/v7/arrow" 28 "github.com/apache/arrow/go/v7/arrow/array" 29 "github.com/apache/arrow/go/v7/arrow/decimal128" 30 "github.com/apache/arrow/go/v7/arrow/memory" 31 "github.com/apache/arrow/go/v7/parquet/file" 32 "github.com/apache/arrow/go/v7/parquet/pqarrow" 33 "github.com/stretchr/testify/assert" 34 "github.com/stretchr/testify/require" 35 ) 36 37 func getDataDir() string { 38 datadir := os.Getenv("PARQUET_TEST_DATA") 39 if datadir == "" { 40 panic("please point PARQUET_TEST_DATA env var to the test data directory") 41 } 42 return datadir 43 } 44 45 func TestArrowReaderAdHocReadDecimals(t *testing.T) { 46 tests := []struct { 47 file string 48 typ *arrow.Decimal128Type 49 }{ 50 {"int32_decimal", &arrow.Decimal128Type{Precision: 4, Scale: 2}}, 51 {"int64_decimal", &arrow.Decimal128Type{Precision: 10, Scale: 2}}, 52 {"fixed_length_decimal", &arrow.Decimal128Type{Precision: 25, Scale: 2}}, 53 {"fixed_length_decimal_legacy", &arrow.Decimal128Type{Precision: 13, Scale: 2}}, 54 {"byte_array_decimal", &arrow.Decimal128Type{Precision: 4, Scale: 2}}, 55 } 56 57 dataDir := getDataDir() 58 for _, tt := range tests { 59 t.Run(tt.file, func(t *testing.T) { 60 mem := memory.NewCheckedAllocator(memory.DefaultAllocator) 61 defer mem.AssertSize(t, 0) 62 63 filename := filepath.Join(dataDir, tt.file+".parquet") 64 require.FileExists(t, filename) 65 66 rdr, err := file.OpenParquetFile(filename, false) 67 require.NoError(t, err) 68 defer rdr.Close() 69 arrowRdr, err := pqarrow.NewFileReader(rdr, pqarrow.ArrowReadProperties{}, mem) 70 require.NoError(t, err) 71 72 tbl, err := arrowRdr.ReadTable(context.Background()) 73 require.NoError(t, err) 74 defer tbl.Release() 75 76 assert.EqualValues(t, 1, tbl.NumCols()) 77 assert.Truef(t, arrow.TypeEqual(tbl.Schema().Field(0).Type, tt.typ), "expected: %s\ngot: %s", tbl.Schema().Field(0).Type, tt.typ) 78 79 const expectedLen = 24 80 valCol := tbl.Column(0) 81 82 assert.EqualValues(t, expectedLen, valCol.Len()) 83 assert.Len(t, valCol.Data().Chunks(), 1) 84 85 chunk := valCol.Data().Chunk(0) 86 bldr := array.NewDecimal128Builder(mem, tt.typ) 87 defer bldr.Release() 88 for i := 0; i < expectedLen; i++ { 89 bldr.Append(decimal128.FromI64(int64((i + 1) * 100))) 90 } 91 92 expectedArr := bldr.NewDecimal128Array() 93 defer expectedArr.Release() 94 95 assert.Truef(t, array.ArrayEqual(expectedArr, chunk), "expected: %s\ngot: %s", expectedArr, chunk) 96 }) 97 } 98 } 99 100 func TestRecordReaderParallel(t *testing.T) { 101 mem := memory.NewCheckedAllocator(memory.DefaultAllocator) 102 defer mem.AssertSize(t, 0) 103 104 tbl := makeDateTimeTypesTable(mem, true, true) 105 defer tbl.Release() 106 107 var buf bytes.Buffer 108 require.NoError(t, pqarrow.WriteTable(tbl, &buf, tbl.NumRows(), nil, pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem)))) 109 110 pf, err := file.NewParquetReader(bytes.NewReader(buf.Bytes())) 111 require.NoError(t, err) 112 113 reader, err := pqarrow.NewFileReader(pf, pqarrow.ArrowReadProperties{BatchSize: 3, Parallel: true}, mem) 114 require.NoError(t, err) 115 116 sc, err := reader.Schema() 117 assert.NoError(t, err) 118 assert.Truef(t, tbl.Schema().Equal(sc), "expected: %s\ngot: %s", tbl.Schema(), sc) 119 120 rr, err := reader.GetRecordReader(context.Background(), nil, nil) 121 assert.NoError(t, err) 122 assert.NotNil(t, rr) 123 defer rr.Release() 124 125 records := make([]arrow.Record, 0) 126 for rr.Next() { 127 rec := rr.Record() 128 defer rec.Release() 129 130 assert.Truef(t, sc.Equal(rec.Schema()), "expected: %s\ngot: %s", sc, rec.Schema()) 131 rec.Retain() 132 records = append(records, rec) 133 } 134 135 assert.False(t, rr.Next()) 136 137 tr := array.NewTableReader(tbl, 3) 138 defer tr.Release() 139 140 assert.True(t, tr.Next()) 141 assert.Truef(t, array.RecordEqual(tr.Record(), records[0]), "expected: %s\ngot: %s", tr.Record(), records[0]) 142 assert.True(t, tr.Next()) 143 assert.Truef(t, array.RecordEqual(tr.Record(), records[1]), "expected: %s\ngot: %s", tr.Record(), records[1]) 144 } 145 146 func TestRecordReaderSerial(t *testing.T) { 147 mem := memory.NewCheckedAllocator(memory.DefaultAllocator) 148 defer mem.AssertSize(t, 0) 149 150 tbl := makeDateTimeTypesTable(mem, true, true) 151 defer tbl.Release() 152 153 var buf bytes.Buffer 154 require.NoError(t, pqarrow.WriteTable(tbl, &buf, tbl.NumRows(), nil, pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem)))) 155 156 pf, err := file.NewParquetReader(bytes.NewReader(buf.Bytes())) 157 require.NoError(t, err) 158 159 reader, err := pqarrow.NewFileReader(pf, pqarrow.ArrowReadProperties{BatchSize: 2}, mem) 160 require.NoError(t, err) 161 162 sc, err := reader.Schema() 163 assert.NoError(t, err) 164 assert.Truef(t, tbl.Schema().Equal(sc), "expected: %s\ngot: %s", tbl.Schema(), sc) 165 166 rr, err := reader.GetRecordReader(context.Background(), nil, nil) 167 assert.NoError(t, err) 168 assert.NotNil(t, rr) 169 defer rr.Release() 170 171 tr := array.NewTableReader(tbl, 2) 172 defer tr.Release() 173 174 rec, err := rr.Read() 175 assert.NoError(t, err) 176 tr.Next() 177 assert.Truef(t, array.RecordEqual(tr.Record(), rec), "expected: %s\ngot: %s", tr.Record(), rec) 178 179 rec, err = rr.Read() 180 assert.NoError(t, err) 181 tr.Next() 182 assert.Truef(t, array.RecordEqual(tr.Record(), rec), "expected: %s\ngot: %s", tr.Record(), rec) 183 184 rec, err = rr.Read() 185 assert.NoError(t, err) 186 tr.Next() 187 assert.Truef(t, array.RecordEqual(tr.Record(), rec), "expected: %s\ngot: %s", tr.Record(), rec) 188 189 rec, err = rr.Read() 190 assert.Same(t, io.EOF, err) 191 assert.Nil(t, rec) 192 }