github.com/apache/arrow/go/v7@v7.0.1/parquet/file/page_reader.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package file 18 19 import ( 20 "bytes" 21 "io" 22 "sync" 23 24 "github.com/JohnCGriffin/overflow" 25 "github.com/apache/arrow/go/v7/arrow/ipc" 26 "github.com/apache/arrow/go/v7/arrow/memory" 27 "github.com/apache/arrow/go/v7/parquet" 28 "github.com/apache/arrow/go/v7/parquet/compress" 29 "github.com/apache/arrow/go/v7/parquet/internal/encryption" 30 format "github.com/apache/arrow/go/v7/parquet/internal/gen-go/parquet" 31 "github.com/apache/arrow/go/v7/parquet/internal/thrift" 32 "github.com/apache/arrow/go/v7/parquet/metadata" 33 "golang.org/x/xerrors" 34 ) 35 36 // PageReader is the interface used by the columnreader in order to read 37 // and handle DataPages and loop through them. 38 type PageReader interface { 39 // Set the maximum Page header size allowed to be read 40 SetMaxPageHeaderSize(int) 41 // Return the current page, or nil if there are no more 42 Page() Page 43 // Fetch the next page, returns false if there are no more pages 44 Next() bool 45 // if Next returns false, Err will return the error encountered or 46 // nil if there was no error and you just hit the end of the page 47 Err() error 48 // Reset allows reusing a page reader 49 Reset(r parquet.ReaderAtSeeker, nrows int64, compressType compress.Compression, ctx *CryptoContext) 50 } 51 52 // Page is an interface for handling DataPages or Dictionary Pages 53 type Page interface { 54 // Returns which kind of page this is 55 Type() format.PageType 56 // Get the raw bytes of this page 57 Data() []byte 58 // return the encoding used for this page, Plain/RLE, etc. 59 Encoding() format.Encoding 60 // get the number of values in this page 61 NumValues() int32 62 // release this page object back into the page pool for re-use 63 Release() 64 } 65 66 type page struct { 67 buf *memory.Buffer 68 typ format.PageType 69 70 nvals int32 71 encoding format.Encoding 72 } 73 74 func (p *page) Type() format.PageType { return p.typ } 75 func (p *page) Data() []byte { return p.buf.Bytes() } 76 func (p *page) NumValues() int32 { return p.nvals } 77 func (p *page) Encoding() format.Encoding { return p.encoding } 78 79 // DataPage is the base interface for both DataPageV1 and DataPageV2 of the 80 // parquet spec. 81 type DataPage interface { 82 Page 83 UncompressedSize() int32 84 Statistics() metadata.EncodedStatistics 85 } 86 87 // Create some pools to use for reusing the data page objects themselves so that 88 // we can avoid tight loops that are creating and destroying tons of individual 89 // objects. This combined with a Release function on the pages themselves 90 // which will put them back into the pool yields significant memory reduction 91 // and performance benefits 92 93 var dataPageV1Pool = sync.Pool{ 94 New: func() interface{} { return (*DataPageV1)(nil) }, 95 } 96 97 var dataPageV2Pool = sync.Pool{ 98 New: func() interface{} { return (*DataPageV2)(nil) }, 99 } 100 101 var dictPagePool = sync.Pool{ 102 New: func() interface{} { return (*DictionaryPage)(nil) }, 103 } 104 105 // DataPageV1 represents a DataPage version 1 from the parquet.thrift file 106 type DataPageV1 struct { 107 page 108 109 defLvlEncoding format.Encoding 110 repLvlEncoding format.Encoding 111 uncompressedSize int32 112 statistics metadata.EncodedStatistics 113 } 114 115 // NewDataPageV1 returns a V1 data page with the given buffer as its data and the specified encoding information 116 // 117 // Will utilize objects that have been released back into the data page pool and 118 // re-use them if available as opposed to creating new objects. Calling Release on the 119 // data page object will release it back to the pool for re-use. 120 func NewDataPageV1(buffer *memory.Buffer, num int32, encoding, defEncoding, repEncoding parquet.Encoding, uncompressedSize int32) *DataPageV1 { 121 dp := dataPageV1Pool.Get().(*DataPageV1) 122 if dp == nil { 123 return &DataPageV1{ 124 page: page{buf: buffer, typ: format.PageType_DATA_PAGE, nvals: num, encoding: format.Encoding(encoding)}, 125 defLvlEncoding: format.Encoding(defEncoding), 126 repLvlEncoding: format.Encoding(repEncoding), 127 uncompressedSize: uncompressedSize, 128 } 129 } 130 131 dp.buf, dp.nvals = buffer, num 132 dp.encoding = format.Encoding(encoding) 133 dp.defLvlEncoding, dp.repLvlEncoding = format.Encoding(defEncoding), format.Encoding(repEncoding) 134 dp.statistics.HasMax, dp.statistics.HasMin = false, false 135 dp.statistics.HasNullCount, dp.statistics.HasDistinctCount = false, false 136 dp.uncompressedSize = uncompressedSize 137 return dp 138 } 139 140 // NewDataPageV1WithStats is the same as NewDataPageV1, but also allows adding the stat info into the created page 141 func NewDataPageV1WithStats(buffer *memory.Buffer, num int32, encoding, defEncoding, repEncoding parquet.Encoding, uncompressedSize int32, stats metadata.EncodedStatistics) *DataPageV1 { 142 ret := NewDataPageV1(buffer, num, encoding, defEncoding, repEncoding, uncompressedSize) 143 ret.statistics = stats 144 return ret 145 } 146 147 // Release this page back into the DataPage object pool so that it can be reused. 148 // 149 // After calling this function, the object should not be utilized anymore, otherwise 150 // conflicts can arise. 151 func (d *DataPageV1) Release() { 152 d.buf.Release() 153 d.buf = nil 154 dataPageV1Pool.Put(d) 155 } 156 157 // UncompressedSize returns the size of the data in this data page when uncompressed 158 func (d *DataPageV1) UncompressedSize() int32 { return d.uncompressedSize } 159 160 // Statistics returns the encoded statistics on this data page 161 func (d *DataPageV1) Statistics() metadata.EncodedStatistics { return d.statistics } 162 163 // DefinitionLevelEncoding returns the encoding utilized for the Definition Levels 164 func (d *DataPageV1) DefinitionLevelEncoding() parquet.Encoding { 165 return parquet.Encoding(d.defLvlEncoding) 166 } 167 168 // RepetitionLevelEncoding returns the encoding utilized for the Repetition Levels 169 func (d *DataPageV1) RepetitionLevelEncoding() parquet.Encoding { 170 return parquet.Encoding(d.repLvlEncoding) 171 } 172 173 // DataPageV2 is the representation of the V2 data page from the parquet.thrift spec 174 type DataPageV2 struct { 175 page 176 177 nulls int32 178 nrows int32 179 defLvlByteLen int32 180 repLvlByteLen int32 181 compressed bool 182 uncompressedSize int32 183 statistics metadata.EncodedStatistics 184 } 185 186 // NewDataPageV2 constructs a new V2 data page with the provided information and a buffer of the raw data. 187 func NewDataPageV2(buffer *memory.Buffer, numValues, numNulls, numRows int32, encoding parquet.Encoding, defLvlsByteLen, repLvlsByteLen, uncompressed int32, isCompressed bool) *DataPageV2 { 188 dp := dataPageV2Pool.Get().(*DataPageV2) 189 if dp == nil { 190 return &DataPageV2{ 191 page: page{buf: buffer, typ: format.PageType_DATA_PAGE_V2, nvals: numValues, encoding: format.Encoding(encoding)}, 192 nulls: numNulls, 193 nrows: numRows, 194 defLvlByteLen: defLvlsByteLen, 195 repLvlByteLen: repLvlsByteLen, 196 compressed: isCompressed, 197 uncompressedSize: uncompressed, 198 } 199 } 200 201 dp.buf, dp.nvals = buffer, numValues 202 dp.encoding = format.Encoding(encoding) 203 dp.nulls, dp.nrows = numNulls, numRows 204 dp.defLvlByteLen, dp.repLvlByteLen = defLvlsByteLen, repLvlsByteLen 205 dp.compressed, dp.uncompressedSize = isCompressed, uncompressed 206 dp.statistics.HasMax, dp.statistics.HasMin = false, false 207 dp.statistics.HasNullCount, dp.statistics.HasDistinctCount = false, false 208 return dp 209 } 210 211 // NewDataPageV2WithStats is the same as NewDataPageV2 but allows providing the encoded stats with the page. 212 func NewDataPageV2WithStats(buffer *memory.Buffer, numValues, numNulls, numRows int32, encoding parquet.Encoding, defLvlsByteLen, repLvlsByteLen, uncompressed int32, isCompressed bool, stats metadata.EncodedStatistics) *DataPageV2 { 213 ret := NewDataPageV2(buffer, numValues, numNulls, numRows, encoding, defLvlsByteLen, repLvlsByteLen, uncompressed, isCompressed) 214 ret.statistics = stats 215 return ret 216 } 217 218 // Release this page back into the DataPage object pool so that it can be reused. 219 // 220 // After calling this function, the object should not be utilized anymore, otherwise 221 // conflicts can arise. 222 func (d *DataPageV2) Release() { 223 d.buf.Release() 224 d.buf = nil 225 dataPageV2Pool.Put(d) 226 } 227 228 // UncompressedSize is the size of the raw page when uncompressed. If `IsCompressed` is true, then 229 // the raw data in the buffer is expected to be compressed. 230 func (d *DataPageV2) UncompressedSize() int32 { return d.uncompressedSize } 231 232 // Statistics are the encoded statistics in the data page 233 func (d *DataPageV2) Statistics() metadata.EncodedStatistics { return d.statistics } 234 235 // NumNulls is the reported number of nulls in this datapage 236 func (d *DataPageV2) NumNulls() int32 { return d.nulls } 237 238 // NumRows is the number of rows recorded in the page header 239 func (d *DataPageV2) NumRows() int32 { return d.nrows } 240 241 // DefinitionLevelByteLen is the number of bytes in the buffer that are used to represent the definition levels 242 func (d *DataPageV2) DefinitionLevelByteLen() int32 { return d.defLvlByteLen } 243 244 // RepetitionLevelByteLen is the number of bytes in the buffer which are used to represent the repetition Levels 245 func (d *DataPageV2) RepetitionLevelByteLen() int32 { return d.repLvlByteLen } 246 247 // IsCompressed returns true if the data of this page is compressed 248 func (d *DataPageV2) IsCompressed() bool { return d.compressed } 249 250 // DictionaryPage represents the a page of data that uses dictionary encoding 251 type DictionaryPage struct { 252 page 253 254 sorted bool 255 } 256 257 // NewDictionaryPage constructs a new dictionary page with the provided data buffer and number of values. 258 func NewDictionaryPage(buffer *memory.Buffer, nvals int32, encoding parquet.Encoding) *DictionaryPage { 259 dp := dictPagePool.Get().(*DictionaryPage) 260 if dp == nil { 261 return &DictionaryPage{ 262 page: page{ 263 buf: buffer, 264 typ: format.PageType_DICTIONARY_PAGE, 265 nvals: nvals, 266 encoding: format.Encoding(encoding), 267 }, 268 } 269 } 270 271 dp.buf = buffer 272 dp.nvals = nvals 273 dp.encoding = format.Encoding(encoding) 274 dp.sorted = false 275 return dp 276 } 277 278 // Release this page back into the DataPage object pool so that it can be reused. 279 // 280 // After calling this function, the object should not be utilized anymore, otherwise 281 // conflicts can arise. 282 func (d *DictionaryPage) Release() { 283 d.buf.Release() 284 d.buf = nil 285 dictPagePool.Put(d) 286 } 287 288 // IsSorted returns whether the dictionary itself is sorted 289 func (d *DictionaryPage) IsSorted() bool { return d.sorted } 290 291 type serializedPageReader struct { 292 r ipc.ReadAtSeeker 293 nrows int64 294 rowsSeen int64 295 mem memory.Allocator 296 codec compress.Codec 297 298 curPageHdr *format.PageHeader 299 buf *memory.Buffer 300 pageOrd int16 301 maxPageHeaderSize int 302 303 curPage Page 304 cryptoCtx CryptoContext 305 dataPageAad string 306 dataPageHeaderAad string 307 308 decompressBuffer bytes.Buffer 309 err error 310 } 311 312 // NewPageReader returns a page reader for the data which can be read from the provided reader and compression. 313 func NewPageReader(r parquet.ReaderAtSeeker, nrows int64, compressType compress.Compression, mem memory.Allocator, ctx *CryptoContext) (PageReader, error) { 314 if mem == nil { 315 mem = memory.NewGoAllocator() 316 } 317 318 codec, err := compress.GetCodec(compressType) 319 if err != nil { 320 return nil, err 321 } 322 323 rdr := &serializedPageReader{ 324 r: r, 325 maxPageHeaderSize: defaultMaxPageHeaderSize, 326 nrows: nrows, 327 mem: mem, 328 codec: codec, 329 buf: memory.NewResizableBuffer(mem), 330 } 331 rdr.decompressBuffer.Grow(defaultPageHeaderSize) 332 if ctx != nil { 333 rdr.cryptoCtx = *ctx 334 rdr.initDecryption() 335 } 336 return rdr, nil 337 } 338 339 func (p *serializedPageReader) Reset(r parquet.ReaderAtSeeker, nrows int64, compressType compress.Compression, ctx *CryptoContext) { 340 p.rowsSeen, p.pageOrd = 0, 0 341 p.curPageHdr, p.curPage, p.err = nil, nil, nil 342 p.r, p.nrows = r, nrows 343 344 p.codec, p.err = compress.GetCodec(compressType) 345 if p.err != nil { 346 return 347 } 348 p.buf.ResizeNoShrink(0) 349 p.decompressBuffer.Reset() 350 if ctx != nil { 351 p.cryptoCtx = *ctx 352 p.initDecryption() 353 } else { 354 p.cryptoCtx = CryptoContext{} 355 p.dataPageAad = "" 356 p.dataPageHeaderAad = "" 357 } 358 } 359 360 func (p *serializedPageReader) Err() error { return p.err } 361 362 func (p *serializedPageReader) SetMaxPageHeaderSize(sz int) { 363 p.maxPageHeaderSize = sz 364 } 365 366 func (p *serializedPageReader) initDecryption() { 367 if p.cryptoCtx.DataDecryptor != nil { 368 p.dataPageAad = encryption.CreateModuleAad(p.cryptoCtx.DataDecryptor.FileAad(), encryption.DataPageModule, 369 p.cryptoCtx.RowGroupOrdinal, p.cryptoCtx.ColumnOrdinal, -1) 370 } 371 if p.cryptoCtx.MetaDecryptor != nil { 372 p.dataPageHeaderAad = encryption.CreateModuleAad(p.cryptoCtx.MetaDecryptor.FileAad(), encryption.DataPageHeaderModule, 373 p.cryptoCtx.RowGroupOrdinal, p.cryptoCtx.ColumnOrdinal, -1) 374 } 375 } 376 377 func (p *serializedPageReader) updateDecryption(decrypt encryption.Decryptor, moduleType int8, pageAad string) { 378 if p.cryptoCtx.StartDecryptWithDictionaryPage { 379 aad := encryption.CreateModuleAad(decrypt.FileAad(), moduleType, p.cryptoCtx.RowGroupOrdinal, p.cryptoCtx.ColumnOrdinal, -1) 380 decrypt.UpdateAad(aad) 381 } else { 382 pageaad := []byte(pageAad) 383 encryption.QuickUpdatePageAad(pageaad, p.pageOrd) 384 decrypt.UpdateAad(string(pageaad)) 385 } 386 } 387 388 func (p *serializedPageReader) Page() Page { 389 return p.curPage 390 } 391 392 func (p *serializedPageReader) decompress(lenCompressed int, buf []byte) ([]byte, error) { 393 p.decompressBuffer.Reset() 394 p.decompressBuffer.Grow(lenCompressed) 395 if _, err := io.CopyN(&p.decompressBuffer, p.r, int64(lenCompressed)); err != nil { 396 return nil, err 397 } 398 399 data := p.decompressBuffer.Bytes() 400 if p.cryptoCtx.DataDecryptor != nil { 401 data = p.cryptoCtx.DataDecryptor.Decrypt(p.decompressBuffer.Bytes()) 402 } 403 404 return p.codec.Decode(buf, data), nil 405 } 406 407 type dataheader interface { 408 IsSetStatistics() bool 409 GetStatistics() *format.Statistics 410 } 411 412 func extractStats(dataHeader dataheader) (pageStats metadata.EncodedStatistics) { 413 if dataHeader.IsSetStatistics() { 414 stats := dataHeader.GetStatistics() 415 if stats.IsSetMaxValue() { 416 pageStats.SetMax(stats.GetMaxValue()) 417 } else if stats.IsSetMax() { 418 pageStats.SetMax(stats.GetMax()) 419 } 420 if stats.IsSetMinValue() { 421 pageStats.SetMin(stats.GetMinValue()) 422 } else if stats.IsSetMin() { 423 pageStats.SetMin(stats.GetMin()) 424 } 425 426 if stats.IsSetNullCount() { 427 pageStats.SetNullCount(stats.GetNullCount()) 428 } 429 if stats.IsSetDistinctCount() { 430 pageStats.SetDistinctCount(stats.GetDistinctCount()) 431 } 432 } 433 return 434 } 435 436 func (p *serializedPageReader) Next() bool { 437 // Loop here because there may be unhandled page types that we skip until 438 // finding a page that we do know what to do with 439 if p.curPage != nil { 440 p.curPage.Release() 441 } 442 p.curPage = nil 443 p.curPageHdr = format.NewPageHeader() 444 p.err = nil 445 446 for p.rowsSeen < p.nrows { 447 // headerSize := 0 448 allowedPgSz := defaultPageHeaderSize 449 450 start, _ := p.r.Seek(0, io.SeekCurrent) 451 p.decompressBuffer.Reset() 452 // Page headers can be very large because of page statistics 453 // We try to deserialize a larger buffer progressively 454 // until a maximum allowed header limit 455 for { 456 n, err := io.CopyN(&p.decompressBuffer, p.r, int64(allowedPgSz)) 457 // view, err := p.r.Peek(allowedPgSz) 458 if err != nil && err != io.EOF { 459 p.err = err 460 return false 461 } 462 463 if n == 0 { 464 return false 465 } 466 467 view := p.decompressBuffer.Bytes() 468 469 extra := 0 470 if p.cryptoCtx.MetaDecryptor != nil { 471 p.updateDecryption(p.cryptoCtx.MetaDecryptor, encryption.DictPageHeaderModule, p.dataPageHeaderAad) 472 view = p.cryptoCtx.MetaDecryptor.Decrypt(view) 473 extra = p.cryptoCtx.MetaDecryptor.CiphertextSizeDelta() 474 } 475 476 remaining, err := thrift.DeserializeThrift(p.curPageHdr, view) 477 if err != nil { 478 allowedPgSz *= 2 479 if allowedPgSz > p.maxPageHeaderSize { 480 p.err = xerrors.New("parquet: deserializing page header failed") 481 return false 482 } 483 continue 484 } 485 486 p.r.Seek(start+int64(len(view)-int(remaining)+extra), io.SeekStart) 487 break 488 } 489 490 lenCompressed := int(p.curPageHdr.GetCompressedPageSize()) 491 lenUncompressed := int(p.curPageHdr.GetUncompressedPageSize()) 492 if lenCompressed < 0 || lenUncompressed < 0 { 493 p.err = xerrors.New("parquet: invalid page header") 494 return false 495 } 496 497 if p.cryptoCtx.DataDecryptor != nil { 498 p.updateDecryption(p.cryptoCtx.DataDecryptor, encryption.DictPageModule, p.dataPageAad) 499 } 500 501 p.buf.ResizeNoShrink(lenUncompressed) 502 503 switch p.curPageHdr.GetType() { 504 case format.PageType_DICTIONARY_PAGE: 505 p.cryptoCtx.StartDecryptWithDictionaryPage = false 506 dictHeader := p.curPageHdr.GetDictionaryPageHeader() 507 if dictHeader.GetNumValues() < 0 { 508 p.err = xerrors.New("parquet: invalid page header (negative number of values)") 509 return false 510 } 511 512 data, err := p.decompress(lenCompressed, p.buf.Bytes()) 513 if err != nil { 514 p.err = err 515 return false 516 } 517 if len(data) != lenUncompressed { 518 p.err = xerrors.Errorf("parquet: metadata said %d bytes uncompressed dictionary page, got %d bytes", lenUncompressed, len(data)) 519 return false 520 } 521 522 // p.buf.Resize(lenUncompressed) 523 // make dictionary page 524 p.curPage = &DictionaryPage{ 525 page: page{ 526 buf: memory.NewBufferBytes(data), 527 typ: p.curPageHdr.Type, 528 nvals: dictHeader.GetNumValues(), 529 encoding: dictHeader.GetEncoding(), 530 }, 531 sorted: dictHeader.IsSetIsSorted() && dictHeader.GetIsSorted(), 532 } 533 534 case format.PageType_DATA_PAGE: 535 p.pageOrd++ 536 dataHeader := p.curPageHdr.GetDataPageHeader() 537 if dataHeader.GetNumValues() < 0 { 538 p.err = xerrors.New("parquet: invalid page header (negative number of values)") 539 return false 540 } 541 542 p.rowsSeen += int64(dataHeader.GetNumValues()) 543 data, err := p.decompress(lenCompressed, p.buf.Bytes()) 544 if err != nil { 545 p.err = err 546 return false 547 } 548 if len(data) != lenUncompressed { 549 p.err = xerrors.Errorf("parquet: metadata said %d bytes uncompressed data page, got %d bytes", lenUncompressed, len(data)) 550 return false 551 } 552 553 // make datapagev1 554 p.curPage = &DataPageV1{ 555 page: page{ 556 buf: memory.NewBufferBytes(data), 557 typ: p.curPageHdr.Type, 558 nvals: dataHeader.GetNumValues(), 559 encoding: dataHeader.GetEncoding(), 560 }, 561 defLvlEncoding: dataHeader.GetDefinitionLevelEncoding(), 562 repLvlEncoding: dataHeader.GetRepetitionLevelEncoding(), 563 uncompressedSize: int32(lenUncompressed), 564 statistics: extractStats(dataHeader), 565 } 566 case format.PageType_DATA_PAGE_V2: 567 p.pageOrd++ 568 dataHeader := p.curPageHdr.GetDataPageHeaderV2() 569 if dataHeader.GetNumValues() < 0 { 570 p.err = xerrors.New("parquet: invalid page header (negative number of values)") 571 return false 572 } 573 574 if dataHeader.GetDefinitionLevelsByteLength() < 0 || dataHeader.GetRepetitionLevelsByteLength() < 0 { 575 p.err = xerrors.New("parquet: invalid page header (negative levels byte length)") 576 return false 577 } 578 579 compressed := dataHeader.GetIsCompressed() 580 // extract stats 581 p.rowsSeen += int64(dataHeader.GetNumValues()) 582 levelsBytelen, ok := overflow.Add(int(dataHeader.GetDefinitionLevelsByteLength()), int(dataHeader.GetRepetitionLevelsByteLength())) 583 if !ok { 584 p.err = xerrors.New("parquet: levels size too large (corrupt file?)") 585 return false 586 } 587 588 var data []byte 589 if compressed { 590 if levelsBytelen > 0 { 591 io.ReadFull(p.r, p.buf.Bytes()[:levelsBytelen]) 592 } 593 if data, p.err = p.decompress(lenCompressed-levelsBytelen, p.buf.Bytes()[levelsBytelen:]); p.err != nil { 594 return false 595 } 596 } else { 597 io.ReadFull(p.r, p.buf.Bytes()) 598 data = p.buf.Bytes() 599 } 600 if len(data) != lenUncompressed { 601 p.err = xerrors.Errorf("parquet: metadata said %d bytes uncompressed data page, got %d bytes", lenUncompressed, len(data)) 602 return false 603 } 604 605 // make datapage v2 606 p.curPage = &DataPageV2{ 607 page: page{ 608 buf: memory.NewBufferBytes(data), 609 typ: p.curPageHdr.Type, 610 nvals: dataHeader.GetNumValues(), 611 encoding: dataHeader.GetEncoding(), 612 }, 613 nulls: dataHeader.GetNumNulls(), 614 nrows: dataHeader.GetNumRows(), 615 defLvlByteLen: dataHeader.GetDefinitionLevelsByteLength(), 616 repLvlByteLen: dataHeader.GetRepetitionLevelsByteLength(), 617 compressed: compressed, 618 uncompressedSize: int32(lenUncompressed), 619 statistics: extractStats(dataHeader), 620 } 621 default: 622 // we don't know this page type, we're allowed to skip non-data pages 623 continue 624 } 625 626 p.buf = memory.NewResizableBuffer(p.mem) 627 return true 628 } 629 630 return false 631 }