github.com/apache/arrow/go/v14@v14.0.2/parquet/file/column_reader_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package file_test
    18  
    19  import (
    20  	"math"
    21  	"math/rand"
    22  	"reflect"
    23  	"runtime"
    24  	"sync"
    25  	"testing"
    26  
    27  	"github.com/apache/arrow/go/v14/arrow/memory"
    28  	"github.com/apache/arrow/go/v14/internal/utils"
    29  	"github.com/apache/arrow/go/v14/parquet"
    30  	"github.com/apache/arrow/go/v14/parquet/file"
    31  	"github.com/apache/arrow/go/v14/parquet/internal/testutils"
    32  	"github.com/apache/arrow/go/v14/parquet/schema"
    33  	"github.com/stretchr/testify/assert"
    34  	"github.com/stretchr/testify/suite"
    35  )
    36  
    37  func initValues(values reflect.Value) {
    38  	if values.Kind() != reflect.Slice {
    39  		panic("must init values with slice")
    40  	}
    41  
    42  	r := rand.New(rand.NewSource(0))
    43  	typ := values.Type().Elem()
    44  	switch {
    45  	case typ.Kind() == reflect.Bool:
    46  		for i := 0; i < values.Len(); i++ {
    47  			values.Index(i).Set(reflect.ValueOf(r.Int31n(2) == 1))
    48  		}
    49  	case typ.Bits() <= 32:
    50  		max := int64(math.MaxInt32)
    51  		min := int64(math.MinInt32)
    52  		for i := 0; i < values.Len(); i++ {
    53  			values.Index(i).Set(reflect.ValueOf(r.Int63n(max-min+1) + min).Convert(reflect.TypeOf(int32(0))))
    54  		}
    55  	case typ.Bits() <= 64:
    56  		max := int64(math.MaxInt64)
    57  		min := int64(math.MinInt64)
    58  		for i := 0; i < values.Len(); i++ {
    59  			values.Index(i).Set(reflect.ValueOf(r.Int63n(max-min+1) + min))
    60  		}
    61  	}
    62  }
    63  
    64  func initDictValues(values reflect.Value, numDicts int) {
    65  	repeatFactor := values.Len() / numDicts
    66  	initValues(values)
    67  	// add some repeated values
    68  	for j := 1; j < repeatFactor; j++ {
    69  		for i := 0; i < numDicts; i++ {
    70  			values.Index(numDicts*j + i).Set(values.Index(i))
    71  		}
    72  	}
    73  	// computed only dict_per_page * repeat_factor - 1 values < num_values compute remaining
    74  	for i := numDicts * repeatFactor; i < values.Len(); i++ {
    75  		values.Index(i).Set(values.Index(i - numDicts*repeatFactor))
    76  	}
    77  }
    78  
    79  func makePages(version parquet.DataPageVersion, d *schema.Column, npages, lvlsPerPage int, typ reflect.Type, enc parquet.Encoding) ([]file.Page, int, reflect.Value, []int16, []int16) {
    80  	nlevels := lvlsPerPage * npages
    81  	nvalues := 0
    82  
    83  	maxDef := d.MaxDefinitionLevel()
    84  	maxRep := d.MaxRepetitionLevel()
    85  
    86  	var (
    87  		defLevels []int16
    88  		repLevels []int16
    89  	)
    90  
    91  	valuesPerPage := make([]int, npages)
    92  	if maxDef > 0 {
    93  		defLevels = make([]int16, nlevels)
    94  		testutils.FillRandomInt16(0, 0, maxDef, defLevels)
    95  		for idx := range valuesPerPage {
    96  			numPerPage := 0
    97  			for i := 0; i < lvlsPerPage; i++ {
    98  				if defLevels[i+idx*lvlsPerPage] == maxDef {
    99  					numPerPage++
   100  					nvalues++
   101  				}
   102  			}
   103  			valuesPerPage[idx] = numPerPage
   104  		}
   105  	} else {
   106  		nvalues = nlevels
   107  		valuesPerPage[0] = lvlsPerPage
   108  		for i := 1; i < len(valuesPerPage); i *= 2 {
   109  			copy(valuesPerPage[i:], valuesPerPage[:i])
   110  		}
   111  	}
   112  
   113  	if maxRep > 0 {
   114  		repLevels = make([]int16, nlevels)
   115  		testutils.FillRandomInt16(0, 0, maxRep, repLevels)
   116  	}
   117  
   118  	values := reflect.MakeSlice(reflect.SliceOf(typ), nvalues, nvalues)
   119  	if enc == parquet.Encodings.Plain {
   120  		initValues(values)
   121  		return testutils.PaginatePlain(version, d, values, defLevels, repLevels, maxDef, maxRep, lvlsPerPage, valuesPerPage, parquet.Encodings.Plain), nvalues, values, defLevels, repLevels
   122  	} else if enc == parquet.Encodings.PlainDict || enc == parquet.Encodings.RLEDict {
   123  		initDictValues(values, lvlsPerPage)
   124  		return testutils.PaginateDict(version, d, values, defLevels, repLevels, maxDef, maxRep, lvlsPerPage, valuesPerPage, parquet.Encodings.RLEDict), nvalues, values, defLevels, repLevels
   125  	}
   126  	panic("invalid encoding type for make pages")
   127  }
   128  
   129  //lint:ignore U1000 compareVectorWithDefLevels
   130  func compareVectorWithDefLevels(left, right reflect.Value, defLevels []int16, maxDef, maxRep int16) assert.Comparison {
   131  	return func() bool {
   132  		if left.Kind() != reflect.Slice || right.Kind() != reflect.Slice {
   133  			return false
   134  		}
   135  
   136  		if left.Type().Elem() != right.Type().Elem() {
   137  			return false
   138  		}
   139  
   140  		iLeft, iRight := 0, 0
   141  		for _, def := range defLevels {
   142  			if def == maxDef {
   143  				if !reflect.DeepEqual(left.Index(iLeft).Interface(), right.Index(iRight).Interface()) {
   144  					return false
   145  				}
   146  				iLeft++
   147  				iRight++
   148  			} else if def == (maxDef - 1) {
   149  				// null entry on the lowest nested level
   150  				iRight++
   151  			} else if def < (maxDef - 1) {
   152  				// null entry on higher nesting level, only supported for non-repeating data
   153  				if maxRep == 0 {
   154  					iRight++
   155  				}
   156  			}
   157  		}
   158  		return true
   159  	}
   160  }
   161  
   162  var mem = memory.DefaultAllocator
   163  
   164  type PrimitiveReaderSuite struct {
   165  	suite.Suite
   166  
   167  	dataPageVersion parquet.DataPageVersion
   168  	pager           file.PageReader
   169  	reader          file.ColumnChunkReader
   170  	pages           []file.Page
   171  	values          reflect.Value
   172  	defLevels       []int16
   173  	repLevels       []int16
   174  	nlevels         int
   175  	nvalues         int
   176  	maxDefLvl       int16
   177  	maxRepLvl       int16
   178  
   179  	bufferPool sync.Pool
   180  }
   181  
   182  func (p *PrimitiveReaderSuite) SetupTest() {
   183  	p.bufferPool = sync.Pool{
   184  		New: func() interface{} {
   185  			buf := memory.NewResizableBuffer(mem)
   186  			runtime.SetFinalizer(buf, func(obj *memory.Buffer) {
   187  				obj.Release()
   188  			})
   189  			return buf
   190  		},
   191  	}
   192  }
   193  
   194  func (p *PrimitiveReaderSuite) TearDownTest() {
   195  	p.clear()
   196  	p.bufferPool = sync.Pool{}
   197  }
   198  
   199  func (p *PrimitiveReaderSuite) initReader(d *schema.Column) {
   200  	m := new(testutils.MockPageReader)
   201  	m.Test(p.T())
   202  	m.TestData().Set("pages", p.pages)
   203  	m.On("Err").Return((error)(nil))
   204  	p.pager = m
   205  	p.reader = file.NewColumnReader(d, m, mem, &p.bufferPool)
   206  }
   207  
   208  func (p *PrimitiveReaderSuite) checkResults(typ reflect.Type) {
   209  	vresult := reflect.MakeSlice(reflect.SliceOf(typ), p.nvalues, p.nvalues)
   210  	dresult := make([]int16, p.nlevels)
   211  	rresult := make([]int16, p.nlevels)
   212  
   213  	var (
   214  		read        int64 = 0
   215  		totalRead   int   = 0
   216  		batchActual int   = 0
   217  		batchSize   int32 = 8
   218  		batch       int   = 0
   219  	)
   220  
   221  	p.Require().NotNil(p.reader)
   222  
   223  	// this will cover both cases:
   224  	// 1) batch size < page size (multiple ReadBatch from a single page)
   225  	// 2) batch size > page size (BatchRead limits to single page)
   226  	for {
   227  		switch rdr := p.reader.(type) {
   228  		case *file.Int32ColumnChunkReader:
   229  			intVals := make([]int32, batchSize)
   230  			read, batch, _ = rdr.ReadBatch(int64(batchSize), intVals, dresult[batchActual:], rresult[batchActual:])
   231  			for i := 0; i < batch; i++ {
   232  				vresult.Index(totalRead + i).Set(reflect.ValueOf(intVals[i]))
   233  			}
   234  
   235  		case *file.BooleanColumnChunkReader:
   236  			boolVals := make([]bool, batchSize)
   237  			read, batch, _ = rdr.ReadBatch(int64(batchSize), boolVals, dresult[batchActual:], rresult[batchActual:])
   238  			for i := 0; i < batch; i++ {
   239  				vresult.Index(totalRead + i).Set(reflect.ValueOf(boolVals[i]))
   240  			}
   241  		default:
   242  			p.Fail("column reader not implemented")
   243  		}
   244  
   245  		totalRead += batch
   246  		batchActual += int(read)
   247  		batchSize = int32(utils.MinInt(1<<24, utils.MaxInt(int(batchSize*2), 4096)))
   248  		if batch <= 0 {
   249  			break
   250  		}
   251  	}
   252  
   253  	p.Equal(p.nlevels, batchActual)
   254  	p.Equal(p.nvalues, totalRead)
   255  	p.Equal(p.values.Interface(), vresult.Interface())
   256  	if p.maxDefLvl > 0 {
   257  		p.Equal(p.defLevels, dresult)
   258  	}
   259  	if p.maxRepLvl > 0 {
   260  		p.Equal(p.repLevels, rresult)
   261  	}
   262  
   263  	// catch improper writes at EOS
   264  	switch rdr := p.reader.(type) {
   265  	case *file.Int32ColumnChunkReader:
   266  		intVals := make([]int32, batchSize)
   267  		read, batchActual, _ = rdr.ReadBatch(5, intVals, nil, nil)
   268  	case *file.BooleanColumnChunkReader:
   269  		boolVals := make([]bool, batchSize)
   270  		read, batchActual, _ = rdr.ReadBatch(5, boolVals, nil, nil)
   271  	default:
   272  		p.Fail("column reader not implemented")
   273  	}
   274  
   275  	p.Zero(batchActual)
   276  	p.Zero(read)
   277  }
   278  
   279  func (p *PrimitiveReaderSuite) clear() {
   280  	p.values = reflect.ValueOf(nil)
   281  	p.defLevels = nil
   282  	p.repLevels = nil
   283  	p.pages = nil
   284  	p.pager = nil
   285  	p.reader = nil
   286  }
   287  
   288  func (p *PrimitiveReaderSuite) testPlain(npages, levels int, d *schema.Column, typ reflect.Type) {
   289  	p.pages, p.nvalues, p.values, p.defLevels, p.repLevels = makePages(p.dataPageVersion, d, npages, levels, typ, parquet.Encodings.Plain)
   290  	p.nlevels = npages * levels
   291  	p.initReader(d)
   292  	p.checkResults(typ)
   293  	p.clear()
   294  }
   295  
   296  func (p *PrimitiveReaderSuite) testDict(npages, levels int, d *schema.Column, typ reflect.Type) {
   297  	p.pages, p.nvalues, p.values, p.defLevels, p.repLevels = makePages(p.dataPageVersion, d, npages, levels, typ, parquet.Encodings.RLEDict)
   298  	p.nlevels = npages * levels
   299  	p.initReader(d)
   300  	p.checkResults(typ)
   301  	p.clear()
   302  }
   303  
   304  func (p *PrimitiveReaderSuite) TestBoolFlatRequired() {
   305  	const (
   306  		levelsPerPage int = 100
   307  		npages        int = 50
   308  	)
   309  
   310  	p.maxDefLvl = 0
   311  	p.maxRepLvl = 0
   312  	typ := schema.NewBooleanNode("a", parquet.Repetitions.Required, -1)
   313  	d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl)
   314  	p.testPlain(npages, levelsPerPage, d, reflect.TypeOf(true))
   315  }
   316  
   317  func (p *PrimitiveReaderSuite) TestBoolFlatOptional() {
   318  	const (
   319  		levelsPerPage int = 100
   320  		npages        int = 50
   321  	)
   322  
   323  	p.maxDefLvl = 4
   324  	p.maxRepLvl = 0
   325  	typ := schema.NewBooleanNode("b", parquet.Repetitions.Optional, -1)
   326  	d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl)
   327  	p.testPlain(npages, levelsPerPage, d, reflect.TypeOf(true))
   328  }
   329  
   330  func (p *PrimitiveReaderSuite) TestBoolFlatOptionalSkip() {
   331  	const (
   332  		levelsPerPage int = 1000
   333  		npages        int = 5
   334  	)
   335  
   336  	p.maxDefLvl = 4
   337  	p.maxRepLvl = 0
   338  	typ := schema.NewBooleanNode("a", parquet.Repetitions.Optional, -1)
   339  	d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl)
   340  	p.pages, p.nvalues, p.values, p.defLevels, p.repLevels = makePages(p.dataPageVersion, d, npages, levelsPerPage, reflect.TypeOf(true), parquet.Encodings.Plain)
   341  	p.initReader(d)
   342  
   343  	vresult := make([]bool, levelsPerPage/2)
   344  	dresult := make([]int16, levelsPerPage/2)
   345  	rresult := make([]int16, levelsPerPage/2)
   346  
   347  	rdr := p.reader.(*file.BooleanColumnChunkReader)
   348  
   349  	values := p.values.Interface().([]bool)
   350  	rIdx := int64(0)
   351  
   352  	p.Run("skip_size > page_size", func() {
   353  		// skip first 2 pages
   354  		skipped, _ := rdr.Skip(int64(2 * levelsPerPage))
   355  		// move test values forward
   356  		for i := int64(0); i < skipped; i++ {
   357  			if p.defLevels[rIdx] == p.maxDefLvl {
   358  				values = values[1:]
   359  			}
   360  			rIdx++
   361  		}
   362  		p.Equal(int64(2*levelsPerPage), skipped)
   363  
   364  		// Read half a page
   365  		rowsRead, valsRead, _ := rdr.ReadBatch(int64(levelsPerPage/2), vresult, dresult, rresult)
   366  		subVals := values[0:valsRead]
   367  		p.Equal(subVals, vresult[:valsRead])
   368  		// move test values forward
   369  		rIdx += rowsRead
   370  		values = values[valsRead:]
   371  	})
   372  
   373  	p.Run("skip_size == page_size", func() {
   374  		// skip one page worth of values across page 2 and 3
   375  		skipped, _ := rdr.Skip(int64(levelsPerPage))
   376  		// move test values forward
   377  		for i := int64(0); i < skipped; i++ {
   378  			if p.defLevels[rIdx] == p.maxDefLvl {
   379  				values = values[1:]
   380  			}
   381  			rIdx++
   382  		}
   383  		p.Equal(int64(levelsPerPage), skipped)
   384  
   385  		// read half a page
   386  		rowsRead, valsRead, _ := rdr.ReadBatch(int64(levelsPerPage/2), vresult, dresult, rresult)
   387  		subVals := values[0:valsRead]
   388  		p.Equal(subVals, vresult[:valsRead])
   389  		// move test values forward
   390  		rIdx += rowsRead
   391  		values = values[valsRead:]
   392  	})
   393  
   394  	p.Run("skip_size < page_size", func() {
   395  		// skip limited to a single page
   396  		// skip half a page
   397  		skipped, _ := rdr.Skip(int64(levelsPerPage / 2))
   398  		// move test values forward
   399  		for i := int64(0); i < skipped; i++ {
   400  			if p.defLevels[rIdx] == p.maxDefLvl {
   401  				values = values[1:] // move test values forward
   402  			}
   403  			rIdx++
   404  		}
   405  		p.Equal(int64(0.5*float32(levelsPerPage)), skipped)
   406  
   407  		// Read half a page
   408  		rowsRead, valsRead, _ := rdr.ReadBatch(int64(levelsPerPage/2), vresult, dresult, rresult)
   409  		subVals := values[0:valsRead]
   410  		p.Equal(subVals, vresult[:valsRead])
   411  		// move test values forward
   412  		rIdx += rowsRead
   413  		values = values[valsRead:]
   414  	})
   415  }
   416  
   417  func (p *PrimitiveReaderSuite) TestInt32FlatRequired() {
   418  	const (
   419  		levelsPerPage int = 100
   420  		npages        int = 50
   421  	)
   422  
   423  	p.maxDefLvl = 0
   424  	p.maxRepLvl = 0
   425  	typ := schema.NewInt32Node("a", parquet.Repetitions.Required, -1)
   426  	d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl)
   427  	p.testPlain(npages, levelsPerPage, d, reflect.TypeOf(int32(0)))
   428  	p.testDict(npages, levelsPerPage, d, reflect.TypeOf(int32(0)))
   429  }
   430  
   431  func (p *PrimitiveReaderSuite) TestInt32FlatOptional() {
   432  	const (
   433  		levelsPerPage int = 100
   434  		npages        int = 50
   435  	)
   436  
   437  	p.maxDefLvl = 4
   438  	p.maxRepLvl = 0
   439  	typ := schema.NewInt32Node("b", parquet.Repetitions.Optional, -1)
   440  	d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl)
   441  	p.testPlain(npages, levelsPerPage, d, reflect.TypeOf(int32(0)))
   442  	p.testDict(npages, levelsPerPage, d, reflect.TypeOf(int32(0)))
   443  }
   444  
   445  func (p *PrimitiveReaderSuite) TestInt32FlatRepeated() {
   446  	const (
   447  		levelsPerPage int = 100
   448  		npages        int = 50
   449  	)
   450  
   451  	p.maxDefLvl = 4
   452  	p.maxRepLvl = 2
   453  	typ := schema.NewInt32Node("c", parquet.Repetitions.Repeated, -1)
   454  	d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl)
   455  	p.testPlain(npages, levelsPerPage, d, reflect.TypeOf(int32(0)))
   456  	p.testDict(npages, levelsPerPage, d, reflect.TypeOf(int32(0)))
   457  }
   458  
   459  func (p *PrimitiveReaderSuite) TestReadBatchMultiPage() {
   460  	const (
   461  		levelsPerPage int = 100
   462  		npages        int = 3
   463  	)
   464  
   465  	p.maxDefLvl = 0
   466  	p.maxRepLvl = 0
   467  	typ := schema.NewInt32Node("a", parquet.Repetitions.Required, -1)
   468  	d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl)
   469  	p.pages, p.nvalues, p.values, p.defLevels, p.repLevels = makePages(p.dataPageVersion, d, npages, levelsPerPage, reflect.TypeOf(int32(0)), parquet.Encodings.Plain)
   470  	p.initReader(d)
   471  
   472  	vresult := make([]int32, levelsPerPage*npages)
   473  	dresult := make([]int16, levelsPerPage*npages)
   474  	rresult := make([]int16, levelsPerPage*npages)
   475  
   476  	rdr := p.reader.(*file.Int32ColumnChunkReader)
   477  	total, read, err := rdr.ReadBatch(int64(levelsPerPage*npages), vresult, dresult, rresult)
   478  	p.NoError(err)
   479  	p.EqualValues(levelsPerPage*npages, total)
   480  	p.EqualValues(levelsPerPage*npages, read)
   481  }
   482  
   483  func (p *PrimitiveReaderSuite) TestInt32FlatRequiredSkip() {
   484  	const (
   485  		levelsPerPage int = 100
   486  		npages        int = 5
   487  	)
   488  
   489  	p.maxDefLvl = 0
   490  	p.maxRepLvl = 0
   491  	typ := schema.NewInt32Node("a", parquet.Repetitions.Required, -1)
   492  	d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl)
   493  	p.pages, p.nvalues, p.values, p.defLevels, p.repLevels = makePages(p.dataPageVersion, d, npages, levelsPerPage, reflect.TypeOf(int32(0)), parquet.Encodings.Plain)
   494  	p.initReader(d)
   495  
   496  	vresult := make([]int32, levelsPerPage/2)
   497  	dresult := make([]int16, levelsPerPage/2)
   498  	rresult := make([]int16, levelsPerPage/2)
   499  
   500  	rdr := p.reader.(*file.Int32ColumnChunkReader)
   501  
   502  	p.Run("skip_size > page_size", func() {
   503  		// Skip first 2 pages
   504  		skipped, _ := rdr.Skip(int64(2 * levelsPerPage))
   505  		p.Equal(int64(2*levelsPerPage), skipped)
   506  
   507  		rdr.ReadBatch(int64(levelsPerPage/2), vresult, dresult, rresult)
   508  		subVals := p.values.Slice(2*levelsPerPage, int(2.5*float64(levelsPerPage))).Interface().([]int32)
   509  		p.Equal(subVals, vresult)
   510  	})
   511  
   512  	p.Run("skip_size == page_size", func() {
   513  		// skip across two pages
   514  		skipped, _ := rdr.Skip(int64(levelsPerPage))
   515  		p.Equal(int64(levelsPerPage), skipped)
   516  		// read half a page
   517  		rdr.ReadBatch(int64(levelsPerPage/2), vresult, dresult, rresult)
   518  		subVals := p.values.Slice(int(3.5*float64(levelsPerPage)), 4*levelsPerPage).Interface().([]int32)
   519  		p.Equal(subVals, vresult)
   520  	})
   521  
   522  	p.Run("skip_size < page_size", func() {
   523  		// skip limited to a single page
   524  		// Skip half a page
   525  		skipped, _ := rdr.Skip(int64(levelsPerPage / 2))
   526  		p.Equal(int64(0.5*float32(levelsPerPage)), skipped)
   527  		// Read half a page
   528  		rdr.ReadBatch(int64(levelsPerPage/2), vresult, dresult, rresult)
   529  		subVals := p.values.Slice(int(4.5*float64(levelsPerPage)), p.values.Len()).Interface().([]int32)
   530  		p.Equal(subVals, vresult)
   531  	})
   532  }
   533  
   534  func (p *PrimitiveReaderSuite) TestRepetitionLvlBytesWithMaxRepZero() {
   535  	const batchSize = 4
   536  	p.maxDefLvl = 1
   537  	p.maxRepLvl = 0
   538  	typ := schema.NewInt32Node("a", parquet.Repetitions.Optional, -1)
   539  	descr := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl)
   540  	// Bytes here came from the example parquet file in ARROW-17453's int32
   541  	// column which was delta bit-packed. The key part is the first three
   542  	// bytes: the page header reports 1 byte for repetition levels even
   543  	// though the max rep level is 0. If that byte isn't skipped then
   544  	// we get def levels of [1, 1, 0, 0] instead of the correct [1, 1, 1, 0].
   545  	pageData := [...]byte{0x3, 0x3, 0x7, 0x80, 0x1, 0x4, 0x3,
   546  		0x18, 0x1, 0x2, 0x0, 0x0, 0x0, 0xc,
   547  		0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}
   548  
   549  	p.pages = append(p.pages, file.NewDataPageV2(memory.NewBufferBytes(pageData[:]), batchSize, 1, batchSize,
   550  		parquet.Encodings.DeltaBinaryPacked, 2, 1, int32(len(pageData)), false))
   551  
   552  	p.initReader(descr)
   553  	p.NotPanics(func() { p.reader.HasNext() })
   554  
   555  	var (
   556  		values  [4]int32
   557  		defLvls [4]int16
   558  	)
   559  	i32Rdr := p.reader.(*file.Int32ColumnChunkReader)
   560  	total, read, err := i32Rdr.ReadBatch(batchSize, values[:], defLvls[:], nil)
   561  	p.NoError(err)
   562  	p.EqualValues(batchSize, total)
   563  	p.EqualValues(3, read)
   564  	p.Equal([]int16{1, 1, 1, 0}, defLvls[:])
   565  	p.Equal([]int32{12, 11, 13, 0}, values[:])
   566  }
   567  
   568  func (p *PrimitiveReaderSuite) TestDictionaryEncodedPages() {
   569  	p.maxDefLvl = 0
   570  	p.maxRepLvl = 0
   571  	typ := schema.NewInt32Node("a", parquet.Repetitions.Required, -1)
   572  	descr := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl)
   573  	dummy := memory.NewResizableBuffer(mem)
   574  
   575  	p.Run("Dict: Plain, Data: RLEDict", func() {
   576  		dictPage := file.NewDictionaryPage(dummy, 0, parquet.Encodings.Plain)
   577  		dataPage := testutils.MakeDataPage(p.dataPageVersion, descr, nil, 0, parquet.Encodings.RLEDict, dummy, nil, nil, 0, 0)
   578  
   579  		p.pages = append(p.pages, dictPage, dataPage)
   580  		p.initReader(descr)
   581  		p.NotPanics(func() { p.reader.HasNext() })
   582  		p.NoError(p.reader.Err())
   583  		p.pages = p.pages[:0]
   584  	})
   585  
   586  	p.Run("Dict: Plain Dictionary, Data: Plain Dictionary", func() {
   587  		dictPage := file.NewDictionaryPage(dummy, 0, parquet.Encodings.PlainDict)
   588  		dataPage := testutils.MakeDataPage(p.dataPageVersion, descr, nil, 0, parquet.Encodings.PlainDict, dummy, nil, nil, 0, 0)
   589  		p.pages = append(p.pages, dictPage, dataPage)
   590  		p.initReader(descr)
   591  		p.NotPanics(func() { p.reader.HasNext() })
   592  		p.NoError(p.reader.Err())
   593  		p.pages = p.pages[:0]
   594  	})
   595  
   596  	p.Run("Panic if dict page not first", func() {
   597  		dataPage := testutils.MakeDataPage(p.dataPageVersion, descr, nil, 0, parquet.Encodings.RLEDict, dummy, nil, nil, 0, 0)
   598  		p.pages = append(p.pages, dataPage)
   599  		p.initReader(descr)
   600  		p.NotPanics(func() { p.False(p.reader.HasNext()) })
   601  		p.Error(p.reader.Err())
   602  		p.pages = p.pages[:0]
   603  	})
   604  
   605  	p.Run("Only RLE is supported", func() {
   606  		dictPage := file.NewDictionaryPage(dummy, 0, parquet.Encodings.DeltaByteArray)
   607  		p.pages = append(p.pages, dictPage)
   608  		p.initReader(descr)
   609  		p.NotPanics(func() { p.False(p.reader.HasNext()) })
   610  		p.Error(p.reader.Err())
   611  		p.pages = p.pages[:0]
   612  	})
   613  
   614  	p.Run("Cannot have more than one dict", func() {
   615  		dictPage1 := file.NewDictionaryPage(dummy, 0, parquet.Encodings.PlainDict)
   616  		dictPage2 := file.NewDictionaryPage(dummy, 0, parquet.Encodings.Plain)
   617  		p.pages = append(p.pages, dictPage1, dictPage2)
   618  		p.initReader(descr)
   619  		p.NotPanics(func() { p.False(p.reader.HasNext()) })
   620  		p.Error(p.reader.Err())
   621  		p.pages = p.pages[:0]
   622  	})
   623  
   624  	p.Run("Unsupported encoding", func() {
   625  		dataPage := testutils.MakeDataPage(p.dataPageVersion, descr, nil, 0, parquet.Encodings.DeltaByteArray, dummy, nil, nil, 0, 0)
   626  		p.pages = append(p.pages, dataPage)
   627  		p.initReader(descr)
   628  		p.Panics(func() { p.reader.HasNext() })
   629  		// p.Error(p.reader.Err())
   630  		p.pages = p.pages[:0]
   631  	})
   632  
   633  	p.pages = p.pages[:2]
   634  }
   635  
   636  func TestPrimitiveReader(t *testing.T) {
   637  	t.Parallel()
   638  	t.Run("datapage v1", func(t *testing.T) {
   639  		suite.Run(t, new(PrimitiveReaderSuite))
   640  	})
   641  	t.Run("datapage v2", func(t *testing.T) {
   642  		suite.Run(t, &PrimitiveReaderSuite{dataPageVersion: parquet.DataPageV2})
   643  	})
   644  }