github.com/apache/arrow/go/v14@v14.0.1/parquet/file/column_reader_test.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package file_test 18 19 import ( 20 "math" 21 "math/rand" 22 "reflect" 23 "runtime" 24 "sync" 25 "testing" 26 27 "github.com/apache/arrow/go/v14/arrow/memory" 28 "github.com/apache/arrow/go/v14/internal/utils" 29 "github.com/apache/arrow/go/v14/parquet" 30 "github.com/apache/arrow/go/v14/parquet/file" 31 "github.com/apache/arrow/go/v14/parquet/internal/testutils" 32 "github.com/apache/arrow/go/v14/parquet/schema" 33 "github.com/stretchr/testify/assert" 34 "github.com/stretchr/testify/suite" 35 ) 36 37 func initValues(values reflect.Value) { 38 if values.Kind() != reflect.Slice { 39 panic("must init values with slice") 40 } 41 42 r := rand.New(rand.NewSource(0)) 43 typ := values.Type().Elem() 44 switch { 45 case typ.Kind() == reflect.Bool: 46 for i := 0; i < values.Len(); i++ { 47 values.Index(i).Set(reflect.ValueOf(r.Int31n(2) == 1)) 48 } 49 case typ.Bits() <= 32: 50 max := int64(math.MaxInt32) 51 min := int64(math.MinInt32) 52 for i := 0; i < values.Len(); i++ { 53 values.Index(i).Set(reflect.ValueOf(r.Int63n(max-min+1) + min).Convert(reflect.TypeOf(int32(0)))) 54 } 55 case typ.Bits() <= 64: 56 max := int64(math.MaxInt64) 57 min := int64(math.MinInt64) 58 for i := 0; i < values.Len(); i++ { 59 values.Index(i).Set(reflect.ValueOf(r.Int63n(max-min+1) + min)) 60 } 61 } 62 } 63 64 func initDictValues(values reflect.Value, numDicts int) { 65 repeatFactor := values.Len() / numDicts 66 initValues(values) 67 // add some repeated values 68 for j := 1; j < repeatFactor; j++ { 69 for i := 0; i < numDicts; i++ { 70 values.Index(numDicts*j + i).Set(values.Index(i)) 71 } 72 } 73 // computed only dict_per_page * repeat_factor - 1 values < num_values compute remaining 74 for i := numDicts * repeatFactor; i < values.Len(); i++ { 75 values.Index(i).Set(values.Index(i - numDicts*repeatFactor)) 76 } 77 } 78 79 func makePages(version parquet.DataPageVersion, d *schema.Column, npages, lvlsPerPage int, typ reflect.Type, enc parquet.Encoding) ([]file.Page, int, reflect.Value, []int16, []int16) { 80 nlevels := lvlsPerPage * npages 81 nvalues := 0 82 83 maxDef := d.MaxDefinitionLevel() 84 maxRep := d.MaxRepetitionLevel() 85 86 var ( 87 defLevels []int16 88 repLevels []int16 89 ) 90 91 valuesPerPage := make([]int, npages) 92 if maxDef > 0 { 93 defLevels = make([]int16, nlevels) 94 testutils.FillRandomInt16(0, 0, maxDef, defLevels) 95 for idx := range valuesPerPage { 96 numPerPage := 0 97 for i := 0; i < lvlsPerPage; i++ { 98 if defLevels[i+idx*lvlsPerPage] == maxDef { 99 numPerPage++ 100 nvalues++ 101 } 102 } 103 valuesPerPage[idx] = numPerPage 104 } 105 } else { 106 nvalues = nlevels 107 valuesPerPage[0] = lvlsPerPage 108 for i := 1; i < len(valuesPerPage); i *= 2 { 109 copy(valuesPerPage[i:], valuesPerPage[:i]) 110 } 111 } 112 113 if maxRep > 0 { 114 repLevels = make([]int16, nlevels) 115 testutils.FillRandomInt16(0, 0, maxRep, repLevels) 116 } 117 118 values := reflect.MakeSlice(reflect.SliceOf(typ), nvalues, nvalues) 119 if enc == parquet.Encodings.Plain { 120 initValues(values) 121 return testutils.PaginatePlain(version, d, values, defLevels, repLevels, maxDef, maxRep, lvlsPerPage, valuesPerPage, parquet.Encodings.Plain), nvalues, values, defLevels, repLevels 122 } else if enc == parquet.Encodings.PlainDict || enc == parquet.Encodings.RLEDict { 123 initDictValues(values, lvlsPerPage) 124 return testutils.PaginateDict(version, d, values, defLevels, repLevels, maxDef, maxRep, lvlsPerPage, valuesPerPage, parquet.Encodings.RLEDict), nvalues, values, defLevels, repLevels 125 } 126 panic("invalid encoding type for make pages") 127 } 128 129 //lint:ignore U1000 compareVectorWithDefLevels 130 func compareVectorWithDefLevels(left, right reflect.Value, defLevels []int16, maxDef, maxRep int16) assert.Comparison { 131 return func() bool { 132 if left.Kind() != reflect.Slice || right.Kind() != reflect.Slice { 133 return false 134 } 135 136 if left.Type().Elem() != right.Type().Elem() { 137 return false 138 } 139 140 iLeft, iRight := 0, 0 141 for _, def := range defLevels { 142 if def == maxDef { 143 if !reflect.DeepEqual(left.Index(iLeft).Interface(), right.Index(iRight).Interface()) { 144 return false 145 } 146 iLeft++ 147 iRight++ 148 } else if def == (maxDef - 1) { 149 // null entry on the lowest nested level 150 iRight++ 151 } else if def < (maxDef - 1) { 152 // null entry on higher nesting level, only supported for non-repeating data 153 if maxRep == 0 { 154 iRight++ 155 } 156 } 157 } 158 return true 159 } 160 } 161 162 var mem = memory.DefaultAllocator 163 164 type PrimitiveReaderSuite struct { 165 suite.Suite 166 167 dataPageVersion parquet.DataPageVersion 168 pager file.PageReader 169 reader file.ColumnChunkReader 170 pages []file.Page 171 values reflect.Value 172 defLevels []int16 173 repLevels []int16 174 nlevels int 175 nvalues int 176 maxDefLvl int16 177 maxRepLvl int16 178 179 bufferPool sync.Pool 180 } 181 182 func (p *PrimitiveReaderSuite) SetupTest() { 183 p.bufferPool = sync.Pool{ 184 New: func() interface{} { 185 buf := memory.NewResizableBuffer(mem) 186 runtime.SetFinalizer(buf, func(obj *memory.Buffer) { 187 obj.Release() 188 }) 189 return buf 190 }, 191 } 192 } 193 194 func (p *PrimitiveReaderSuite) TearDownTest() { 195 p.clear() 196 p.bufferPool = sync.Pool{} 197 } 198 199 func (p *PrimitiveReaderSuite) initReader(d *schema.Column) { 200 m := new(testutils.MockPageReader) 201 m.Test(p.T()) 202 m.TestData().Set("pages", p.pages) 203 m.On("Err").Return((error)(nil)) 204 p.pager = m 205 p.reader = file.NewColumnReader(d, m, mem, &p.bufferPool) 206 } 207 208 func (p *PrimitiveReaderSuite) checkResults(typ reflect.Type) { 209 vresult := reflect.MakeSlice(reflect.SliceOf(typ), p.nvalues, p.nvalues) 210 dresult := make([]int16, p.nlevels) 211 rresult := make([]int16, p.nlevels) 212 213 var ( 214 read int64 = 0 215 totalRead int = 0 216 batchActual int = 0 217 batchSize int32 = 8 218 batch int = 0 219 ) 220 221 p.Require().NotNil(p.reader) 222 223 // this will cover both cases: 224 // 1) batch size < page size (multiple ReadBatch from a single page) 225 // 2) batch size > page size (BatchRead limits to single page) 226 for { 227 switch rdr := p.reader.(type) { 228 case *file.Int32ColumnChunkReader: 229 intVals := make([]int32, batchSize) 230 read, batch, _ = rdr.ReadBatch(int64(batchSize), intVals, dresult[batchActual:], rresult[batchActual:]) 231 for i := 0; i < batch; i++ { 232 vresult.Index(totalRead + i).Set(reflect.ValueOf(intVals[i])) 233 } 234 235 case *file.BooleanColumnChunkReader: 236 boolVals := make([]bool, batchSize) 237 read, batch, _ = rdr.ReadBatch(int64(batchSize), boolVals, dresult[batchActual:], rresult[batchActual:]) 238 for i := 0; i < batch; i++ { 239 vresult.Index(totalRead + i).Set(reflect.ValueOf(boolVals[i])) 240 } 241 default: 242 p.Fail("column reader not implemented") 243 } 244 245 totalRead += batch 246 batchActual += int(read) 247 batchSize = int32(utils.MinInt(1<<24, utils.MaxInt(int(batchSize*2), 4096))) 248 if batch <= 0 { 249 break 250 } 251 } 252 253 p.Equal(p.nlevels, batchActual) 254 p.Equal(p.nvalues, totalRead) 255 p.Equal(p.values.Interface(), vresult.Interface()) 256 if p.maxDefLvl > 0 { 257 p.Equal(p.defLevels, dresult) 258 } 259 if p.maxRepLvl > 0 { 260 p.Equal(p.repLevels, rresult) 261 } 262 263 // catch improper writes at EOS 264 switch rdr := p.reader.(type) { 265 case *file.Int32ColumnChunkReader: 266 intVals := make([]int32, batchSize) 267 read, batchActual, _ = rdr.ReadBatch(5, intVals, nil, nil) 268 case *file.BooleanColumnChunkReader: 269 boolVals := make([]bool, batchSize) 270 read, batchActual, _ = rdr.ReadBatch(5, boolVals, nil, nil) 271 default: 272 p.Fail("column reader not implemented") 273 } 274 275 p.Zero(batchActual) 276 p.Zero(read) 277 } 278 279 func (p *PrimitiveReaderSuite) clear() { 280 p.values = reflect.ValueOf(nil) 281 p.defLevels = nil 282 p.repLevels = nil 283 p.pages = nil 284 p.pager = nil 285 p.reader = nil 286 } 287 288 func (p *PrimitiveReaderSuite) testPlain(npages, levels int, d *schema.Column, typ reflect.Type) { 289 p.pages, p.nvalues, p.values, p.defLevels, p.repLevels = makePages(p.dataPageVersion, d, npages, levels, typ, parquet.Encodings.Plain) 290 p.nlevels = npages * levels 291 p.initReader(d) 292 p.checkResults(typ) 293 p.clear() 294 } 295 296 func (p *PrimitiveReaderSuite) testDict(npages, levels int, d *schema.Column, typ reflect.Type) { 297 p.pages, p.nvalues, p.values, p.defLevels, p.repLevels = makePages(p.dataPageVersion, d, npages, levels, typ, parquet.Encodings.RLEDict) 298 p.nlevels = npages * levels 299 p.initReader(d) 300 p.checkResults(typ) 301 p.clear() 302 } 303 304 func (p *PrimitiveReaderSuite) TestBoolFlatRequired() { 305 const ( 306 levelsPerPage int = 100 307 npages int = 50 308 ) 309 310 p.maxDefLvl = 0 311 p.maxRepLvl = 0 312 typ := schema.NewBooleanNode("a", parquet.Repetitions.Required, -1) 313 d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl) 314 p.testPlain(npages, levelsPerPage, d, reflect.TypeOf(true)) 315 } 316 317 func (p *PrimitiveReaderSuite) TestBoolFlatOptional() { 318 const ( 319 levelsPerPage int = 100 320 npages int = 50 321 ) 322 323 p.maxDefLvl = 4 324 p.maxRepLvl = 0 325 typ := schema.NewBooleanNode("b", parquet.Repetitions.Optional, -1) 326 d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl) 327 p.testPlain(npages, levelsPerPage, d, reflect.TypeOf(true)) 328 } 329 330 func (p *PrimitiveReaderSuite) TestBoolFlatOptionalSkip() { 331 const ( 332 levelsPerPage int = 1000 333 npages int = 5 334 ) 335 336 p.maxDefLvl = 4 337 p.maxRepLvl = 0 338 typ := schema.NewBooleanNode("a", parquet.Repetitions.Optional, -1) 339 d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl) 340 p.pages, p.nvalues, p.values, p.defLevels, p.repLevels = makePages(p.dataPageVersion, d, npages, levelsPerPage, reflect.TypeOf(true), parquet.Encodings.Plain) 341 p.initReader(d) 342 343 vresult := make([]bool, levelsPerPage/2) 344 dresult := make([]int16, levelsPerPage/2) 345 rresult := make([]int16, levelsPerPage/2) 346 347 rdr := p.reader.(*file.BooleanColumnChunkReader) 348 349 values := p.values.Interface().([]bool) 350 rIdx := int64(0) 351 352 p.Run("skip_size > page_size", func() { 353 // skip first 2 pages 354 skipped, _ := rdr.Skip(int64(2 * levelsPerPage)) 355 // move test values forward 356 for i := int64(0); i < skipped; i++ { 357 if p.defLevels[rIdx] == p.maxDefLvl { 358 values = values[1:] 359 } 360 rIdx++ 361 } 362 p.Equal(int64(2*levelsPerPage), skipped) 363 364 // Read half a page 365 rowsRead, valsRead, _ := rdr.ReadBatch(int64(levelsPerPage/2), vresult, dresult, rresult) 366 subVals := values[0:valsRead] 367 p.Equal(subVals, vresult[:valsRead]) 368 // move test values forward 369 rIdx += rowsRead 370 values = values[valsRead:] 371 }) 372 373 p.Run("skip_size == page_size", func() { 374 // skip one page worth of values across page 2 and 3 375 skipped, _ := rdr.Skip(int64(levelsPerPage)) 376 // move test values forward 377 for i := int64(0); i < skipped; i++ { 378 if p.defLevels[rIdx] == p.maxDefLvl { 379 values = values[1:] 380 } 381 rIdx++ 382 } 383 p.Equal(int64(levelsPerPage), skipped) 384 385 // read half a page 386 rowsRead, valsRead, _ := rdr.ReadBatch(int64(levelsPerPage/2), vresult, dresult, rresult) 387 subVals := values[0:valsRead] 388 p.Equal(subVals, vresult[:valsRead]) 389 // move test values forward 390 rIdx += rowsRead 391 values = values[valsRead:] 392 }) 393 394 p.Run("skip_size < page_size", func() { 395 // skip limited to a single page 396 // skip half a page 397 skipped, _ := rdr.Skip(int64(levelsPerPage / 2)) 398 // move test values forward 399 for i := int64(0); i < skipped; i++ { 400 if p.defLevels[rIdx] == p.maxDefLvl { 401 values = values[1:] // move test values forward 402 } 403 rIdx++ 404 } 405 p.Equal(int64(0.5*float32(levelsPerPage)), skipped) 406 407 // Read half a page 408 rowsRead, valsRead, _ := rdr.ReadBatch(int64(levelsPerPage/2), vresult, dresult, rresult) 409 subVals := values[0:valsRead] 410 p.Equal(subVals, vresult[:valsRead]) 411 // move test values forward 412 rIdx += rowsRead 413 values = values[valsRead:] 414 }) 415 } 416 417 func (p *PrimitiveReaderSuite) TestInt32FlatRequired() { 418 const ( 419 levelsPerPage int = 100 420 npages int = 50 421 ) 422 423 p.maxDefLvl = 0 424 p.maxRepLvl = 0 425 typ := schema.NewInt32Node("a", parquet.Repetitions.Required, -1) 426 d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl) 427 p.testPlain(npages, levelsPerPage, d, reflect.TypeOf(int32(0))) 428 p.testDict(npages, levelsPerPage, d, reflect.TypeOf(int32(0))) 429 } 430 431 func (p *PrimitiveReaderSuite) TestInt32FlatOptional() { 432 const ( 433 levelsPerPage int = 100 434 npages int = 50 435 ) 436 437 p.maxDefLvl = 4 438 p.maxRepLvl = 0 439 typ := schema.NewInt32Node("b", parquet.Repetitions.Optional, -1) 440 d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl) 441 p.testPlain(npages, levelsPerPage, d, reflect.TypeOf(int32(0))) 442 p.testDict(npages, levelsPerPage, d, reflect.TypeOf(int32(0))) 443 } 444 445 func (p *PrimitiveReaderSuite) TestInt32FlatRepeated() { 446 const ( 447 levelsPerPage int = 100 448 npages int = 50 449 ) 450 451 p.maxDefLvl = 4 452 p.maxRepLvl = 2 453 typ := schema.NewInt32Node("c", parquet.Repetitions.Repeated, -1) 454 d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl) 455 p.testPlain(npages, levelsPerPage, d, reflect.TypeOf(int32(0))) 456 p.testDict(npages, levelsPerPage, d, reflect.TypeOf(int32(0))) 457 } 458 459 func (p *PrimitiveReaderSuite) TestReadBatchMultiPage() { 460 const ( 461 levelsPerPage int = 100 462 npages int = 3 463 ) 464 465 p.maxDefLvl = 0 466 p.maxRepLvl = 0 467 typ := schema.NewInt32Node("a", parquet.Repetitions.Required, -1) 468 d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl) 469 p.pages, p.nvalues, p.values, p.defLevels, p.repLevels = makePages(p.dataPageVersion, d, npages, levelsPerPage, reflect.TypeOf(int32(0)), parquet.Encodings.Plain) 470 p.initReader(d) 471 472 vresult := make([]int32, levelsPerPage*npages) 473 dresult := make([]int16, levelsPerPage*npages) 474 rresult := make([]int16, levelsPerPage*npages) 475 476 rdr := p.reader.(*file.Int32ColumnChunkReader) 477 total, read, err := rdr.ReadBatch(int64(levelsPerPage*npages), vresult, dresult, rresult) 478 p.NoError(err) 479 p.EqualValues(levelsPerPage*npages, total) 480 p.EqualValues(levelsPerPage*npages, read) 481 } 482 483 func (p *PrimitiveReaderSuite) TestInt32FlatRequiredSkip() { 484 const ( 485 levelsPerPage int = 100 486 npages int = 5 487 ) 488 489 p.maxDefLvl = 0 490 p.maxRepLvl = 0 491 typ := schema.NewInt32Node("a", parquet.Repetitions.Required, -1) 492 d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl) 493 p.pages, p.nvalues, p.values, p.defLevels, p.repLevels = makePages(p.dataPageVersion, d, npages, levelsPerPage, reflect.TypeOf(int32(0)), parquet.Encodings.Plain) 494 p.initReader(d) 495 496 vresult := make([]int32, levelsPerPage/2) 497 dresult := make([]int16, levelsPerPage/2) 498 rresult := make([]int16, levelsPerPage/2) 499 500 rdr := p.reader.(*file.Int32ColumnChunkReader) 501 502 p.Run("skip_size > page_size", func() { 503 // Skip first 2 pages 504 skipped, _ := rdr.Skip(int64(2 * levelsPerPage)) 505 p.Equal(int64(2*levelsPerPage), skipped) 506 507 rdr.ReadBatch(int64(levelsPerPage/2), vresult, dresult, rresult) 508 subVals := p.values.Slice(2*levelsPerPage, int(2.5*float64(levelsPerPage))).Interface().([]int32) 509 p.Equal(subVals, vresult) 510 }) 511 512 p.Run("skip_size == page_size", func() { 513 // skip across two pages 514 skipped, _ := rdr.Skip(int64(levelsPerPage)) 515 p.Equal(int64(levelsPerPage), skipped) 516 // read half a page 517 rdr.ReadBatch(int64(levelsPerPage/2), vresult, dresult, rresult) 518 subVals := p.values.Slice(int(3.5*float64(levelsPerPage)), 4*levelsPerPage).Interface().([]int32) 519 p.Equal(subVals, vresult) 520 }) 521 522 p.Run("skip_size < page_size", func() { 523 // skip limited to a single page 524 // Skip half a page 525 skipped, _ := rdr.Skip(int64(levelsPerPage / 2)) 526 p.Equal(int64(0.5*float32(levelsPerPage)), skipped) 527 // Read half a page 528 rdr.ReadBatch(int64(levelsPerPage/2), vresult, dresult, rresult) 529 subVals := p.values.Slice(int(4.5*float64(levelsPerPage)), p.values.Len()).Interface().([]int32) 530 p.Equal(subVals, vresult) 531 }) 532 } 533 534 func (p *PrimitiveReaderSuite) TestRepetitionLvlBytesWithMaxRepZero() { 535 const batchSize = 4 536 p.maxDefLvl = 1 537 p.maxRepLvl = 0 538 typ := schema.NewInt32Node("a", parquet.Repetitions.Optional, -1) 539 descr := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl) 540 // Bytes here came from the example parquet file in ARROW-17453's int32 541 // column which was delta bit-packed. The key part is the first three 542 // bytes: the page header reports 1 byte for repetition levels even 543 // though the max rep level is 0. If that byte isn't skipped then 544 // we get def levels of [1, 1, 0, 0] instead of the correct [1, 1, 1, 0]. 545 pageData := [...]byte{0x3, 0x3, 0x7, 0x80, 0x1, 0x4, 0x3, 546 0x18, 0x1, 0x2, 0x0, 0x0, 0x0, 0xc, 547 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} 548 549 p.pages = append(p.pages, file.NewDataPageV2(memory.NewBufferBytes(pageData[:]), batchSize, 1, batchSize, 550 parquet.Encodings.DeltaBinaryPacked, 2, 1, int32(len(pageData)), false)) 551 552 p.initReader(descr) 553 p.NotPanics(func() { p.reader.HasNext() }) 554 555 var ( 556 values [4]int32 557 defLvls [4]int16 558 ) 559 i32Rdr := p.reader.(*file.Int32ColumnChunkReader) 560 total, read, err := i32Rdr.ReadBatch(batchSize, values[:], defLvls[:], nil) 561 p.NoError(err) 562 p.EqualValues(batchSize, total) 563 p.EqualValues(3, read) 564 p.Equal([]int16{1, 1, 1, 0}, defLvls[:]) 565 p.Equal([]int32{12, 11, 13, 0}, values[:]) 566 } 567 568 func (p *PrimitiveReaderSuite) TestDictionaryEncodedPages() { 569 p.maxDefLvl = 0 570 p.maxRepLvl = 0 571 typ := schema.NewInt32Node("a", parquet.Repetitions.Required, -1) 572 descr := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl) 573 dummy := memory.NewResizableBuffer(mem) 574 575 p.Run("Dict: Plain, Data: RLEDict", func() { 576 dictPage := file.NewDictionaryPage(dummy, 0, parquet.Encodings.Plain) 577 dataPage := testutils.MakeDataPage(p.dataPageVersion, descr, nil, 0, parquet.Encodings.RLEDict, dummy, nil, nil, 0, 0) 578 579 p.pages = append(p.pages, dictPage, dataPage) 580 p.initReader(descr) 581 p.NotPanics(func() { p.reader.HasNext() }) 582 p.NoError(p.reader.Err()) 583 p.pages = p.pages[:0] 584 }) 585 586 p.Run("Dict: Plain Dictionary, Data: Plain Dictionary", func() { 587 dictPage := file.NewDictionaryPage(dummy, 0, parquet.Encodings.PlainDict) 588 dataPage := testutils.MakeDataPage(p.dataPageVersion, descr, nil, 0, parquet.Encodings.PlainDict, dummy, nil, nil, 0, 0) 589 p.pages = append(p.pages, dictPage, dataPage) 590 p.initReader(descr) 591 p.NotPanics(func() { p.reader.HasNext() }) 592 p.NoError(p.reader.Err()) 593 p.pages = p.pages[:0] 594 }) 595 596 p.Run("Panic if dict page not first", func() { 597 dataPage := testutils.MakeDataPage(p.dataPageVersion, descr, nil, 0, parquet.Encodings.RLEDict, dummy, nil, nil, 0, 0) 598 p.pages = append(p.pages, dataPage) 599 p.initReader(descr) 600 p.NotPanics(func() { p.False(p.reader.HasNext()) }) 601 p.Error(p.reader.Err()) 602 p.pages = p.pages[:0] 603 }) 604 605 p.Run("Only RLE is supported", func() { 606 dictPage := file.NewDictionaryPage(dummy, 0, parquet.Encodings.DeltaByteArray) 607 p.pages = append(p.pages, dictPage) 608 p.initReader(descr) 609 p.NotPanics(func() { p.False(p.reader.HasNext()) }) 610 p.Error(p.reader.Err()) 611 p.pages = p.pages[:0] 612 }) 613 614 p.Run("Cannot have more than one dict", func() { 615 dictPage1 := file.NewDictionaryPage(dummy, 0, parquet.Encodings.PlainDict) 616 dictPage2 := file.NewDictionaryPage(dummy, 0, parquet.Encodings.Plain) 617 p.pages = append(p.pages, dictPage1, dictPage2) 618 p.initReader(descr) 619 p.NotPanics(func() { p.False(p.reader.HasNext()) }) 620 p.Error(p.reader.Err()) 621 p.pages = p.pages[:0] 622 }) 623 624 p.Run("Unsupported encoding", func() { 625 dataPage := testutils.MakeDataPage(p.dataPageVersion, descr, nil, 0, parquet.Encodings.DeltaByteArray, dummy, nil, nil, 0, 0) 626 p.pages = append(p.pages, dataPage) 627 p.initReader(descr) 628 p.Panics(func() { p.reader.HasNext() }) 629 // p.Error(p.reader.Err()) 630 p.pages = p.pages[:0] 631 }) 632 633 p.pages = p.pages[:2] 634 } 635 636 func TestPrimitiveReader(t *testing.T) { 637 t.Parallel() 638 t.Run("datapage v1", func(t *testing.T) { 639 suite.Run(t, new(PrimitiveReaderSuite)) 640 }) 641 t.Run("datapage v2", func(t *testing.T) { 642 suite.Run(t, &PrimitiveReaderSuite{dataPageVersion: parquet.DataPageV2}) 643 }) 644 }