github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/page.go (about) 1 package parquet 2 3 import ( 4 "bytes" 5 "fmt" 6 "io" 7 8 "github.com/parquet-go/parquet-go/deprecated" 9 "github.com/parquet-go/parquet-go/encoding" 10 "github.com/parquet-go/parquet-go/internal/bitpack" 11 "github.com/parquet-go/parquet-go/internal/debug" 12 ) 13 14 // Page values represent sequences of parquet values. From the Parquet 15 // documentation: "Column chunks are a chunk of the data for a particular 16 // column. They live in a particular row group and are guaranteed to be 17 // contiguous in the file. Column chunks are divided up into pages. A page is 18 // conceptually an indivisible unit (in terms of compression and encoding). 19 // There can be multiple page types which are interleaved in a column chunk." 20 // 21 // https://github.com/apache/parquet-format#glossary 22 type Page interface { 23 // Returns the type of values read from this page. 24 // 25 // The returned type can be used to encode the page data, in the case of 26 // an indexed page (which has a dictionary), the type is configured to 27 // encode the indexes stored in the page rather than the plain values. 28 Type() Type 29 30 // Returns the column index that this page belongs to. 31 Column() int 32 33 // If the page contains indexed values, calling this method returns the 34 // dictionary in which the values are looked up. Otherwise, the method 35 // returns nil. 36 Dictionary() Dictionary 37 38 // Returns the number of rows, values, and nulls in the page. The number of 39 // rows may be less than the number of values in the page if the page is 40 // part of a repeated column. 41 NumRows() int64 42 NumValues() int64 43 NumNulls() int64 44 45 // Returns the page's min and max values. 46 // 47 // The third value is a boolean indicating whether the page bounds were 48 // available. Page bounds may not be known if the page contained no values 49 // or only nulls, or if they were read from a parquet file which had neither 50 // page statistics nor a page index. 51 Bounds() (min, max Value, ok bool) 52 53 // Returns the size of the page in bytes (uncompressed). 54 Size() int64 55 56 // Returns a reader exposing the values contained in the page. 57 // 58 // Depending on the underlying implementation, the returned reader may 59 // support reading an array of typed Go values by implementing interfaces 60 // like parquet.Int32Reader. Applications should use type assertions on 61 // the returned reader to determine whether those optimizations are 62 // available. 63 Values() ValueReader 64 65 // Returns a new page which is as slice of the receiver between row indexes 66 // i and j. 67 Slice(i, j int64) Page 68 69 // Expose the lists of repetition and definition levels of the page. 70 // 71 // The returned slices may be empty when the page has no repetition or 72 // definition levels. 73 RepetitionLevels() []byte 74 DefinitionLevels() []byte 75 76 // Returns the in-memory buffer holding the page values. 77 // 78 // The intent is for the returned value to be used as input parameter when 79 // calling the Encode method of the associated Type. 80 // 81 // The slices referenced by the encoding.Values may be the same across 82 // multiple calls to this method, applications must treat the content as 83 // immutable. 84 Data() encoding.Values 85 } 86 87 // PageReader is an interface implemented by types that support producing a 88 // sequence of pages. 89 type PageReader interface { 90 // Reads and returns the next page from the sequence. When all pages have 91 // been read, or if the sequence was closed, the method returns io.EOF. 92 ReadPage() (Page, error) 93 } 94 95 // PageWriter is an interface implemented by types that support writing pages 96 // to an underlying storage medium. 97 type PageWriter interface { 98 WritePage(Page) (int64, error) 99 } 100 101 // Pages is an interface implemented by page readers returned by calling the 102 // Pages method of ColumnChunk instances. 103 type Pages interface { 104 PageReader 105 RowSeeker 106 io.Closer 107 } 108 109 // AsyncPages wraps the given Pages instance to perform page reads 110 // asynchronously in a separate goroutine. 111 // 112 // Performing page reads asynchronously is important when the application may 113 // be reading pages from a high latency backend, and the last 114 // page read may be processed while initiating reading of the next page. 115 func AsyncPages(pages Pages) Pages { 116 p := new(asyncPages) 117 p.init(pages, nil) 118 // If the pages object gets garbage collected without Close being called, 119 // this finalizer would ensure that the goroutine is stopped and doesn't 120 // leak. 121 debug.SetFinalizer(p, func(p *asyncPages) { p.Close() }) 122 return p 123 } 124 125 type asyncPages struct { 126 read <-chan asyncPage 127 seek chan<- int64 128 done chan<- struct{} 129 version int64 130 } 131 132 type asyncPage struct { 133 page Page 134 err error 135 version int64 136 } 137 138 func (pages *asyncPages) init(base Pages, done chan struct{}) { 139 read := make(chan asyncPage) 140 seek := make(chan int64, 1) 141 142 pages.read = read 143 pages.seek = seek 144 145 if done == nil { 146 done = make(chan struct{}) 147 pages.done = done 148 } 149 150 go readPages(base, read, seek, done) 151 } 152 153 func (pages *asyncPages) Close() (err error) { 154 if pages.done != nil { 155 close(pages.done) 156 pages.done = nil 157 } 158 for p := range pages.read { 159 // Capture the last error, which is the value returned from closing the 160 // underlying Pages instance. 161 err = p.err 162 } 163 pages.seek = nil 164 return err 165 } 166 167 func (pages *asyncPages) ReadPage() (Page, error) { 168 for { 169 p, ok := <-pages.read 170 if !ok { 171 return nil, io.EOF 172 } 173 // Because calls to SeekToRow might be made concurrently to reading 174 // pages, it is possible for ReadPage to see pages that were read before 175 // the last SeekToRow call. 176 // 177 // A version number is attached to each page read asynchronously to 178 // discard outdated pages and ensure that we maintain a consistent view 179 // of the sequence of pages read. 180 if p.version == pages.version { 181 return p.page, p.err 182 } 183 } 184 } 185 186 func (pages *asyncPages) SeekToRow(rowIndex int64) error { 187 if pages.seek == nil { 188 return io.ErrClosedPipe 189 } 190 // The seek channel has a capacity of 1 to allow the first SeekToRow call to 191 // be non-blocking. 192 // 193 // If SeekToRow calls are performed faster than they can be handled by the 194 // goroutine reading pages, this path might become a contention point. 195 pages.seek <- rowIndex 196 pages.version++ 197 return nil 198 } 199 200 func readPages(pages Pages, read chan<- asyncPage, seek <-chan int64, done <-chan struct{}) { 201 defer func() { 202 read <- asyncPage{err: pages.Close(), version: -1} 203 close(read) 204 }() 205 206 version := int64(0) 207 for { 208 page, err := pages.ReadPage() 209 210 for { 211 select { 212 case <-done: 213 return 214 case read <- asyncPage{ 215 page: page, 216 err: err, 217 version: version, 218 }: 219 case rowIndex := <-seek: 220 version++ 221 err = pages.SeekToRow(rowIndex) 222 } 223 if err == nil { 224 break 225 } 226 } 227 } 228 } 229 230 type singlePage struct { 231 page Page 232 seek int64 233 numRows int64 234 } 235 236 func (r *singlePage) ReadPage() (Page, error) { 237 if r.page != nil { 238 if r.seek < r.numRows { 239 seek := r.seek 240 r.seek = r.numRows 241 if seek > 0 { 242 return r.page.Slice(seek, r.numRows), nil 243 } 244 return r.page, nil 245 } 246 } 247 return nil, io.EOF 248 } 249 250 func (r *singlePage) SeekToRow(rowIndex int64) error { 251 r.seek = rowIndex 252 return nil 253 } 254 255 func (r *singlePage) Close() error { 256 r.page = nil 257 r.seek = 0 258 return nil 259 } 260 261 func onePage(page Page) Pages { 262 return &singlePage{page: page, numRows: page.NumRows()} 263 } 264 265 // CopyPages copies pages from src to dst, returning the number of values that 266 // were copied. 267 // 268 // The function returns any error it encounters reading or writing pages, except 269 // for io.EOF from the reader which indicates that there were no more pages to 270 // read. 271 func CopyPages(dst PageWriter, src PageReader) (numValues int64, err error) { 272 for { 273 p, err := src.ReadPage() 274 if err != nil { 275 if err == io.EOF { 276 err = nil 277 } 278 return numValues, err 279 } 280 n, err := dst.WritePage(p) 281 numValues += n 282 if err != nil { 283 return numValues, err 284 } 285 } 286 } 287 288 // errorPage is an implementation of the Page interface which always errors when 289 // attempting to read its values. 290 // 291 // The error page declares that it contains one value (even if it does not) 292 // as a way to ensure that it is not ignored due to being empty when written 293 // to a file. 294 type errorPage struct { 295 typ Type 296 err error 297 columnIndex int 298 } 299 300 func newErrorPage(typ Type, columnIndex int, msg string, args ...interface{}) *errorPage { 301 return &errorPage{ 302 typ: typ, 303 err: fmt.Errorf(msg, args...), 304 columnIndex: columnIndex, 305 } 306 } 307 308 func (page *errorPage) Type() Type { return page.typ } 309 func (page *errorPage) Column() int { return page.columnIndex } 310 func (page *errorPage) Dictionary() Dictionary { return nil } 311 func (page *errorPage) NumRows() int64 { return 1 } 312 func (page *errorPage) NumValues() int64 { return 1 } 313 func (page *errorPage) NumNulls() int64 { return 0 } 314 func (page *errorPage) Bounds() (min, max Value, ok bool) { return } 315 func (page *errorPage) Slice(i, j int64) Page { return page } 316 func (page *errorPage) Size() int64 { return 1 } 317 func (page *errorPage) RepetitionLevels() []byte { return nil } 318 func (page *errorPage) DefinitionLevels() []byte { return nil } 319 func (page *errorPage) Data() encoding.Values { return encoding.Values{} } 320 func (page *errorPage) Values() ValueReader { return errorPageValues{page: page} } 321 322 type errorPageValues struct{ page *errorPage } 323 324 func (r errorPageValues) ReadValues([]Value) (int, error) { return 0, r.page.err } 325 func (r errorPageValues) Close() error { return nil } 326 327 func errPageBoundsOutOfRange(i, j, n int64) error { 328 return fmt.Errorf("page bounds out of range [%d:%d]: with length %d", i, j, n) 329 } 330 331 type optionalPage struct { 332 base Page 333 maxDefinitionLevel byte 334 definitionLevels []byte 335 } 336 337 func newOptionalPage(base Page, maxDefinitionLevel byte, definitionLevels []byte) *optionalPage { 338 return &optionalPage{ 339 base: base, 340 maxDefinitionLevel: maxDefinitionLevel, 341 definitionLevels: definitionLevels, 342 } 343 } 344 345 func (page *optionalPage) Type() Type { return page.base.Type() } 346 347 func (page *optionalPage) Column() int { return page.base.Column() } 348 349 func (page *optionalPage) Dictionary() Dictionary { return page.base.Dictionary() } 350 351 func (page *optionalPage) NumRows() int64 { return int64(len(page.definitionLevels)) } 352 353 func (page *optionalPage) NumValues() int64 { return int64(len(page.definitionLevels)) } 354 355 func (page *optionalPage) NumNulls() int64 { 356 return int64(countLevelsNotEqual(page.definitionLevels, page.maxDefinitionLevel)) 357 } 358 359 func (page *optionalPage) Bounds() (min, max Value, ok bool) { return page.base.Bounds() } 360 361 func (page *optionalPage) Size() int64 { return int64(len(page.definitionLevels)) + page.base.Size() } 362 363 func (page *optionalPage) RepetitionLevels() []byte { return nil } 364 365 func (page *optionalPage) DefinitionLevels() []byte { return page.definitionLevels } 366 367 func (page *optionalPage) Data() encoding.Values { return page.base.Data() } 368 369 func (page *optionalPage) Values() ValueReader { 370 return &optionalPageValues{ 371 page: page, 372 values: page.base.Values(), 373 } 374 } 375 376 func (page *optionalPage) Slice(i, j int64) Page { 377 maxDefinitionLevel := page.maxDefinitionLevel 378 definitionLevels := page.definitionLevels 379 numNulls1 := int64(countLevelsNotEqual(definitionLevels[:i], maxDefinitionLevel)) 380 numNulls2 := int64(countLevelsNotEqual(definitionLevels[i:j], maxDefinitionLevel)) 381 return newOptionalPage( 382 page.base.Slice(i-numNulls1, j-(numNulls1+numNulls2)), 383 maxDefinitionLevel, 384 definitionLevels[i:j:j], 385 ) 386 } 387 388 type repeatedPage struct { 389 base Page 390 maxRepetitionLevel byte 391 maxDefinitionLevel byte 392 definitionLevels []byte 393 repetitionLevels []byte 394 } 395 396 func newRepeatedPage(base Page, maxRepetitionLevel, maxDefinitionLevel byte, repetitionLevels, definitionLevels []byte) *repeatedPage { 397 return &repeatedPage{ 398 base: base, 399 maxRepetitionLevel: maxRepetitionLevel, 400 maxDefinitionLevel: maxDefinitionLevel, 401 definitionLevels: definitionLevels, 402 repetitionLevels: repetitionLevels, 403 } 404 } 405 406 func (page *repeatedPage) Type() Type { return page.base.Type() } 407 408 func (page *repeatedPage) Column() int { return page.base.Column() } 409 410 func (page *repeatedPage) Dictionary() Dictionary { return page.base.Dictionary() } 411 412 func (page *repeatedPage) NumRows() int64 { return int64(countLevelsEqual(page.repetitionLevels, 0)) } 413 414 func (page *repeatedPage) NumValues() int64 { return int64(len(page.definitionLevels)) } 415 416 func (page *repeatedPage) NumNulls() int64 { 417 return int64(countLevelsNotEqual(page.definitionLevels, page.maxDefinitionLevel)) 418 } 419 420 func (page *repeatedPage) Bounds() (min, max Value, ok bool) { return page.base.Bounds() } 421 422 func (page *repeatedPage) Size() int64 { 423 return int64(len(page.repetitionLevels)) + int64(len(page.definitionLevels)) + page.base.Size() 424 } 425 426 func (page *repeatedPage) RepetitionLevels() []byte { return page.repetitionLevels } 427 428 func (page *repeatedPage) DefinitionLevels() []byte { return page.definitionLevels } 429 430 func (page *repeatedPage) Data() encoding.Values { return page.base.Data() } 431 432 func (page *repeatedPage) Values() ValueReader { 433 return &repeatedPageValues{ 434 page: page, 435 values: page.base.Values(), 436 } 437 } 438 439 func (page *repeatedPage) Slice(i, j int64) Page { 440 numRows := page.NumRows() 441 if i < 0 || i > numRows { 442 panic(errPageBoundsOutOfRange(i, j, numRows)) 443 } 444 if j < 0 || j > numRows { 445 panic(errPageBoundsOutOfRange(i, j, numRows)) 446 } 447 if i > j { 448 panic(errPageBoundsOutOfRange(i, j, numRows)) 449 } 450 451 maxRepetitionLevel := page.maxRepetitionLevel 452 maxDefinitionLevel := page.maxDefinitionLevel 453 repetitionLevels := page.repetitionLevels 454 definitionLevels := page.definitionLevels 455 456 rowIndex0 := 0 457 rowIndex1 := len(repetitionLevels) 458 rowIndex2 := len(repetitionLevels) 459 460 for k, def := range repetitionLevels { 461 if def == 0 { 462 if rowIndex0 == int(i) { 463 rowIndex1 = k 464 break 465 } 466 rowIndex0++ 467 } 468 } 469 470 for k, def := range repetitionLevels[rowIndex1:] { 471 if def == 0 { 472 if rowIndex0 == int(j) { 473 rowIndex2 = rowIndex1 + k 474 break 475 } 476 rowIndex0++ 477 } 478 } 479 480 numNulls1 := countLevelsNotEqual(definitionLevels[:rowIndex1], maxDefinitionLevel) 481 numNulls2 := countLevelsNotEqual(definitionLevels[rowIndex1:rowIndex2], maxDefinitionLevel) 482 483 i = int64(rowIndex1 - numNulls1) 484 j = int64(rowIndex2 - (numNulls1 + numNulls2)) 485 486 return newRepeatedPage( 487 page.base.Slice(i, j), 488 maxRepetitionLevel, 489 maxDefinitionLevel, 490 repetitionLevels[rowIndex1:rowIndex2:rowIndex2], 491 definitionLevels[rowIndex1:rowIndex2:rowIndex2], 492 ) 493 } 494 495 type booleanPage struct { 496 typ Type 497 bits []byte 498 offset int32 499 numValues int32 500 columnIndex int16 501 } 502 503 func newBooleanPage(typ Type, columnIndex int16, numValues int32, values encoding.Values) *booleanPage { 504 return &booleanPage{ 505 typ: typ, 506 bits: values.Boolean()[:bitpack.ByteCount(uint(numValues))], 507 numValues: numValues, 508 columnIndex: ^columnIndex, 509 } 510 } 511 512 func (page *booleanPage) Type() Type { return page.typ } 513 514 func (page *booleanPage) Column() int { return int(^page.columnIndex) } 515 516 func (page *booleanPage) Dictionary() Dictionary { return nil } 517 518 func (page *booleanPage) NumRows() int64 { return int64(page.numValues) } 519 520 func (page *booleanPage) NumValues() int64 { return int64(page.numValues) } 521 522 func (page *booleanPage) NumNulls() int64 { return 0 } 523 524 func (page *booleanPage) Size() int64 { return int64(len(page.bits)) } 525 526 func (page *booleanPage) RepetitionLevels() []byte { return nil } 527 528 func (page *booleanPage) DefinitionLevels() []byte { return nil } 529 530 func (page *booleanPage) Data() encoding.Values { return encoding.BooleanValues(page.bits) } 531 532 func (page *booleanPage) Values() ValueReader { return &booleanPageValues{page: page} } 533 534 func (page *booleanPage) valueAt(i int) bool { 535 j := uint32(int(page.offset)+i) / 8 536 k := uint32(int(page.offset)+i) % 8 537 return ((page.bits[j] >> k) & 1) != 0 538 } 539 540 func (page *booleanPage) min() bool { 541 for i := 0; i < int(page.numValues); i++ { 542 if !page.valueAt(i) { 543 return false 544 } 545 } 546 return page.numValues > 0 547 } 548 549 func (page *booleanPage) max() bool { 550 for i := 0; i < int(page.numValues); i++ { 551 if page.valueAt(i) { 552 return true 553 } 554 } 555 return false 556 } 557 558 func (page *booleanPage) bounds() (min, max bool) { 559 hasFalse, hasTrue := false, false 560 561 for i := 0; i < int(page.numValues); i++ { 562 v := page.valueAt(i) 563 if v { 564 hasTrue = true 565 } else { 566 hasFalse = true 567 } 568 if hasTrue && hasFalse { 569 break 570 } 571 } 572 573 min = !hasFalse 574 max = hasTrue 575 return min, max 576 } 577 578 func (page *booleanPage) Bounds() (min, max Value, ok bool) { 579 if ok = page.numValues > 0; ok { 580 minBool, maxBool := page.bounds() 581 min = page.makeValue(minBool) 582 max = page.makeValue(maxBool) 583 } 584 return min, max, ok 585 } 586 587 func (page *booleanPage) Slice(i, j int64) Page { 588 lowWithOffset := i + int64(page.offset) 589 highWithOffset := j + int64(page.offset) 590 591 off := lowWithOffset / 8 592 end := highWithOffset / 8 593 594 if (highWithOffset % 8) != 0 { 595 end++ 596 } 597 598 return &booleanPage{ 599 typ: page.typ, 600 bits: page.bits[off:end], 601 offset: int32(lowWithOffset % 8), 602 numValues: int32(j - i), 603 columnIndex: page.columnIndex, 604 } 605 } 606 607 func (page *booleanPage) makeValue(v bool) Value { 608 value := makeValueBoolean(v) 609 value.columnIndex = page.columnIndex 610 return value 611 } 612 613 type int32Page struct { 614 typ Type 615 values []int32 616 columnIndex int16 617 } 618 619 func newInt32Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *int32Page { 620 return &int32Page{ 621 typ: typ, 622 values: values.Int32()[:numValues], 623 columnIndex: ^columnIndex, 624 } 625 } 626 627 func (page *int32Page) Type() Type { return page.typ } 628 629 func (page *int32Page) Column() int { return int(^page.columnIndex) } 630 631 func (page *int32Page) Dictionary() Dictionary { return nil } 632 633 func (page *int32Page) NumRows() int64 { return int64(len(page.values)) } 634 635 func (page *int32Page) NumValues() int64 { return int64(len(page.values)) } 636 637 func (page *int32Page) NumNulls() int64 { return 0 } 638 639 func (page *int32Page) Size() int64 { return 4 * int64(len(page.values)) } 640 641 func (page *int32Page) RepetitionLevels() []byte { return nil } 642 643 func (page *int32Page) DefinitionLevels() []byte { return nil } 644 645 func (page *int32Page) Data() encoding.Values { return encoding.Int32Values(page.values) } 646 647 func (page *int32Page) Values() ValueReader { return &int32PageValues{page: page} } 648 649 func (page *int32Page) min() int32 { return minInt32(page.values) } 650 651 func (page *int32Page) max() int32 { return maxInt32(page.values) } 652 653 func (page *int32Page) bounds() (min, max int32) { return boundsInt32(page.values) } 654 655 func (page *int32Page) Bounds() (min, max Value, ok bool) { 656 if ok = len(page.values) > 0; ok { 657 minInt32, maxInt32 := page.bounds() 658 min = page.makeValue(minInt32) 659 max = page.makeValue(maxInt32) 660 } 661 return min, max, ok 662 } 663 664 func (page *int32Page) Slice(i, j int64) Page { 665 return &int32Page{ 666 typ: page.typ, 667 values: page.values[i:j], 668 columnIndex: page.columnIndex, 669 } 670 } 671 672 func (page *int32Page) makeValue(v int32) Value { 673 value := makeValueInt32(v) 674 value.columnIndex = page.columnIndex 675 return value 676 } 677 678 type int64Page struct { 679 typ Type 680 values []int64 681 columnIndex int16 682 } 683 684 func newInt64Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *int64Page { 685 return &int64Page{ 686 typ: typ, 687 values: values.Int64()[:numValues], 688 columnIndex: ^columnIndex, 689 } 690 } 691 692 func (page *int64Page) Type() Type { return page.typ } 693 694 func (page *int64Page) Column() int { return int(^page.columnIndex) } 695 696 func (page *int64Page) Dictionary() Dictionary { return nil } 697 698 func (page *int64Page) NumRows() int64 { return int64(len(page.values)) } 699 700 func (page *int64Page) NumValues() int64 { return int64(len(page.values)) } 701 702 func (page *int64Page) NumNulls() int64 { return 0 } 703 704 func (page *int64Page) Size() int64 { return 8 * int64(len(page.values)) } 705 706 func (page *int64Page) RepetitionLevels() []byte { return nil } 707 708 func (page *int64Page) DefinitionLevels() []byte { return nil } 709 710 func (page *int64Page) Data() encoding.Values { return encoding.Int64Values(page.values) } 711 712 func (page *int64Page) Values() ValueReader { return &int64PageValues{page: page} } 713 714 func (page *int64Page) min() int64 { return minInt64(page.values) } 715 716 func (page *int64Page) max() int64 { return maxInt64(page.values) } 717 718 func (page *int64Page) bounds() (min, max int64) { return boundsInt64(page.values) } 719 720 func (page *int64Page) Bounds() (min, max Value, ok bool) { 721 if ok = len(page.values) > 0; ok { 722 minInt64, maxInt64 := page.bounds() 723 min = page.makeValue(minInt64) 724 max = page.makeValue(maxInt64) 725 } 726 return min, max, ok 727 } 728 729 func (page *int64Page) Slice(i, j int64) Page { 730 return &int64Page{ 731 typ: page.typ, 732 values: page.values[i:j], 733 columnIndex: page.columnIndex, 734 } 735 } 736 737 func (page *int64Page) makeValue(v int64) Value { 738 value := makeValueInt64(v) 739 value.columnIndex = page.columnIndex 740 return value 741 } 742 743 type int96Page struct { 744 typ Type 745 values []deprecated.Int96 746 columnIndex int16 747 } 748 749 func newInt96Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *int96Page { 750 return &int96Page{ 751 typ: typ, 752 values: values.Int96()[:numValues], 753 columnIndex: ^columnIndex, 754 } 755 } 756 757 func (page *int96Page) Type() Type { return page.typ } 758 759 func (page *int96Page) Column() int { return int(^page.columnIndex) } 760 761 func (page *int96Page) Dictionary() Dictionary { return nil } 762 763 func (page *int96Page) NumRows() int64 { return int64(len(page.values)) } 764 765 func (page *int96Page) NumValues() int64 { return int64(len(page.values)) } 766 767 func (page *int96Page) NumNulls() int64 { return 0 } 768 769 func (page *int96Page) Size() int64 { return 12 * int64(len(page.values)) } 770 771 func (page *int96Page) RepetitionLevels() []byte { return nil } 772 773 func (page *int96Page) DefinitionLevels() []byte { return nil } 774 775 func (page *int96Page) Data() encoding.Values { return encoding.Int96Values(page.values) } 776 777 func (page *int96Page) Values() ValueReader { return &int96PageValues{page: page} } 778 779 func (page *int96Page) min() deprecated.Int96 { return deprecated.MinInt96(page.values) } 780 781 func (page *int96Page) max() deprecated.Int96 { return deprecated.MaxInt96(page.values) } 782 783 func (page *int96Page) bounds() (min, max deprecated.Int96) { 784 return deprecated.MinMaxInt96(page.values) 785 } 786 787 func (page *int96Page) Bounds() (min, max Value, ok bool) { 788 if ok = len(page.values) > 0; ok { 789 minInt96, maxInt96 := page.bounds() 790 min = page.makeValue(minInt96) 791 max = page.makeValue(maxInt96) 792 } 793 return min, max, ok 794 } 795 796 func (page *int96Page) Slice(i, j int64) Page { 797 return &int96Page{ 798 typ: page.typ, 799 values: page.values[i:j], 800 columnIndex: page.columnIndex, 801 } 802 } 803 804 func (page *int96Page) makeValue(v deprecated.Int96) Value { 805 value := makeValueInt96(v) 806 value.columnIndex = page.columnIndex 807 return value 808 } 809 810 type floatPage struct { 811 typ Type 812 values []float32 813 columnIndex int16 814 } 815 816 func newFloatPage(typ Type, columnIndex int16, numValues int32, values encoding.Values) *floatPage { 817 return &floatPage{ 818 typ: typ, 819 values: values.Float()[:numValues], 820 columnIndex: ^columnIndex, 821 } 822 } 823 824 func (page *floatPage) Type() Type { return page.typ } 825 826 func (page *floatPage) Column() int { return int(^page.columnIndex) } 827 828 func (page *floatPage) Dictionary() Dictionary { return nil } 829 830 func (page *floatPage) NumRows() int64 { return int64(len(page.values)) } 831 832 func (page *floatPage) NumValues() int64 { return int64(len(page.values)) } 833 834 func (page *floatPage) NumNulls() int64 { return 0 } 835 836 func (page *floatPage) Size() int64 { return 4 * int64(len(page.values)) } 837 838 func (page *floatPage) RepetitionLevels() []byte { return nil } 839 840 func (page *floatPage) DefinitionLevels() []byte { return nil } 841 842 func (page *floatPage) Data() encoding.Values { return encoding.FloatValues(page.values) } 843 844 func (page *floatPage) Values() ValueReader { return &floatPageValues{page: page} } 845 846 func (page *floatPage) min() float32 { return minFloat32(page.values) } 847 848 func (page *floatPage) max() float32 { return maxFloat32(page.values) } 849 850 func (page *floatPage) bounds() (min, max float32) { return boundsFloat32(page.values) } 851 852 func (page *floatPage) Bounds() (min, max Value, ok bool) { 853 if ok = len(page.values) > 0; ok { 854 minFloat32, maxFloat32 := page.bounds() 855 min = page.makeValue(minFloat32) 856 max = page.makeValue(maxFloat32) 857 } 858 return min, max, ok 859 } 860 861 func (page *floatPage) Slice(i, j int64) Page { 862 return &floatPage{ 863 typ: page.typ, 864 values: page.values[i:j], 865 columnIndex: page.columnIndex, 866 } 867 } 868 869 func (page *floatPage) makeValue(v float32) Value { 870 value := makeValueFloat(v) 871 value.columnIndex = page.columnIndex 872 return value 873 } 874 875 type doublePage struct { 876 typ Type 877 values []float64 878 columnIndex int16 879 } 880 881 func newDoublePage(typ Type, columnIndex int16, numValues int32, values encoding.Values) *doublePage { 882 return &doublePage{ 883 typ: typ, 884 values: values.Double()[:numValues], 885 columnIndex: ^columnIndex, 886 } 887 } 888 889 func (page *doublePage) Type() Type { return page.typ } 890 891 func (page *doublePage) Column() int { return int(^page.columnIndex) } 892 893 func (page *doublePage) Dictionary() Dictionary { return nil } 894 895 func (page *doublePage) NumRows() int64 { return int64(len(page.values)) } 896 897 func (page *doublePage) NumValues() int64 { return int64(len(page.values)) } 898 899 func (page *doublePage) NumNulls() int64 { return 0 } 900 901 func (page *doublePage) Size() int64 { return 8 * int64(len(page.values)) } 902 903 func (page *doublePage) RepetitionLevels() []byte { return nil } 904 905 func (page *doublePage) DefinitionLevels() []byte { return nil } 906 907 func (page *doublePage) Data() encoding.Values { return encoding.DoubleValues(page.values) } 908 909 func (page *doublePage) Values() ValueReader { return &doublePageValues{page: page} } 910 911 func (page *doublePage) min() float64 { return minFloat64(page.values) } 912 913 func (page *doublePage) max() float64 { return maxFloat64(page.values) } 914 915 func (page *doublePage) bounds() (min, max float64) { return boundsFloat64(page.values) } 916 917 func (page *doublePage) Bounds() (min, max Value, ok bool) { 918 if ok = len(page.values) > 0; ok { 919 minFloat64, maxFloat64 := page.bounds() 920 min = page.makeValue(minFloat64) 921 max = page.makeValue(maxFloat64) 922 } 923 return min, max, ok 924 } 925 926 func (page *doublePage) Slice(i, j int64) Page { 927 return &doublePage{ 928 typ: page.typ, 929 values: page.values[i:j], 930 columnIndex: page.columnIndex, 931 } 932 } 933 934 func (page *doublePage) makeValue(v float64) Value { 935 value := makeValueDouble(v) 936 value.columnIndex = page.columnIndex 937 return value 938 } 939 940 type byteArrayPage struct { 941 typ Type 942 values []byte 943 offsets []uint32 944 columnIndex int16 945 } 946 947 func newByteArrayPage(typ Type, columnIndex int16, numValues int32, values encoding.Values) *byteArrayPage { 948 data, offsets := values.ByteArray() 949 return &byteArrayPage{ 950 typ: typ, 951 values: data, 952 offsets: offsets[:numValues+1], 953 columnIndex: ^columnIndex, 954 } 955 } 956 957 func (page *byteArrayPage) Type() Type { return page.typ } 958 959 func (page *byteArrayPage) Column() int { return int(^page.columnIndex) } 960 961 func (page *byteArrayPage) Dictionary() Dictionary { return nil } 962 963 func (page *byteArrayPage) NumRows() int64 { return int64(page.len()) } 964 965 func (page *byteArrayPage) NumValues() int64 { return int64(page.len()) } 966 967 func (page *byteArrayPage) NumNulls() int64 { return 0 } 968 969 func (page *byteArrayPage) Size() int64 { return int64(len(page.values)) + 4*int64(len(page.offsets)) } 970 971 func (page *byteArrayPage) RepetitionLevels() []byte { return nil } 972 973 func (page *byteArrayPage) DefinitionLevels() []byte { return nil } 974 975 func (page *byteArrayPage) Data() encoding.Values { 976 return encoding.ByteArrayValues(page.values, page.offsets) 977 } 978 979 func (page *byteArrayPage) Values() ValueReader { return &byteArrayPageValues{page: page} } 980 981 func (page *byteArrayPage) len() int { return len(page.offsets) - 1 } 982 983 func (page *byteArrayPage) index(i int) []byte { 984 j := page.offsets[i+0] 985 k := page.offsets[i+1] 986 return page.values[j:k:k] 987 } 988 989 func (page *byteArrayPage) min() (min []byte) { 990 if n := page.len(); n > 0 { 991 min = page.index(0) 992 993 for i := 1; i < n; i++ { 994 v := page.index(i) 995 996 if bytes.Compare(v, min) < 0 { 997 min = v 998 } 999 } 1000 } 1001 return min 1002 } 1003 1004 func (page *byteArrayPage) max() (max []byte) { 1005 if n := page.len(); n > 0 { 1006 max = page.index(0) 1007 1008 for i := 1; i < n; i++ { 1009 v := page.index(i) 1010 1011 if bytes.Compare(v, max) > 0 { 1012 max = v 1013 } 1014 } 1015 } 1016 return max 1017 } 1018 1019 func (page *byteArrayPage) bounds() (min, max []byte) { 1020 if n := page.len(); n > 0 { 1021 min = page.index(0) 1022 max = min 1023 1024 for i := 1; i < n; i++ { 1025 v := page.index(i) 1026 1027 switch { 1028 case bytes.Compare(v, min) < 0: 1029 min = v 1030 case bytes.Compare(v, max) > 0: 1031 max = v 1032 } 1033 } 1034 } 1035 return min, max 1036 } 1037 1038 func (page *byteArrayPage) Bounds() (min, max Value, ok bool) { 1039 if ok = len(page.offsets) > 1; ok { 1040 minBytes, maxBytes := page.bounds() 1041 min = page.makeValueBytes(minBytes) 1042 max = page.makeValueBytes(maxBytes) 1043 } 1044 return min, max, ok 1045 } 1046 1047 func (page *byteArrayPage) cloneValues() []byte { 1048 values := make([]byte, len(page.values)) 1049 copy(values, page.values) 1050 return values 1051 } 1052 1053 func (page *byteArrayPage) cloneOffsets() []uint32 { 1054 offsets := make([]uint32, len(page.offsets)) 1055 copy(offsets, page.offsets) 1056 return offsets 1057 } 1058 1059 func (page *byteArrayPage) Slice(i, j int64) Page { 1060 return &byteArrayPage{ 1061 typ: page.typ, 1062 values: page.values, 1063 offsets: page.offsets[i : j+1], 1064 columnIndex: page.columnIndex, 1065 } 1066 } 1067 1068 func (page *byteArrayPage) makeValueBytes(v []byte) Value { 1069 value := makeValueBytes(ByteArray, v) 1070 value.columnIndex = page.columnIndex 1071 return value 1072 } 1073 1074 func (page *byteArrayPage) makeValueString(v string) Value { 1075 value := makeValueString(ByteArray, v) 1076 value.columnIndex = page.columnIndex 1077 return value 1078 } 1079 1080 type fixedLenByteArrayPage struct { 1081 typ Type 1082 data []byte 1083 size int 1084 columnIndex int16 1085 } 1086 1087 func newFixedLenByteArrayPage(typ Type, columnIndex int16, numValues int32, values encoding.Values) *fixedLenByteArrayPage { 1088 data, size := values.FixedLenByteArray() 1089 return &fixedLenByteArrayPage{ 1090 typ: typ, 1091 data: data[:int(numValues)*size], 1092 size: size, 1093 columnIndex: ^columnIndex, 1094 } 1095 } 1096 1097 func (page *fixedLenByteArrayPage) Type() Type { return page.typ } 1098 1099 func (page *fixedLenByteArrayPage) Column() int { return int(^page.columnIndex) } 1100 1101 func (page *fixedLenByteArrayPage) Dictionary() Dictionary { return nil } 1102 1103 func (page *fixedLenByteArrayPage) NumRows() int64 { return int64(len(page.data) / page.size) } 1104 1105 func (page *fixedLenByteArrayPage) NumValues() int64 { return int64(len(page.data) / page.size) } 1106 1107 func (page *fixedLenByteArrayPage) NumNulls() int64 { return 0 } 1108 1109 func (page *fixedLenByteArrayPage) Size() int64 { return int64(len(page.data)) } 1110 1111 func (page *fixedLenByteArrayPage) RepetitionLevels() []byte { return nil } 1112 1113 func (page *fixedLenByteArrayPage) DefinitionLevels() []byte { return nil } 1114 1115 func (page *fixedLenByteArrayPage) Data() encoding.Values { 1116 return encoding.FixedLenByteArrayValues(page.data, page.size) 1117 } 1118 1119 func (page *fixedLenByteArrayPage) Values() ValueReader { 1120 return &fixedLenByteArrayPageValues{page: page} 1121 } 1122 1123 func (page *fixedLenByteArrayPage) min() []byte { return minFixedLenByteArray(page.data, page.size) } 1124 1125 func (page *fixedLenByteArrayPage) max() []byte { return maxFixedLenByteArray(page.data, page.size) } 1126 1127 func (page *fixedLenByteArrayPage) bounds() (min, max []byte) { 1128 return boundsFixedLenByteArray(page.data, page.size) 1129 } 1130 1131 func (page *fixedLenByteArrayPage) Bounds() (min, max Value, ok bool) { 1132 if ok = len(page.data) > 0; ok { 1133 minBytes, maxBytes := page.bounds() 1134 min = page.makeValueBytes(minBytes) 1135 max = page.makeValueBytes(maxBytes) 1136 } 1137 return min, max, ok 1138 } 1139 1140 func (page *fixedLenByteArrayPage) Slice(i, j int64) Page { 1141 return &fixedLenByteArrayPage{ 1142 typ: page.typ, 1143 data: page.data[i*int64(page.size) : j*int64(page.size)], 1144 size: page.size, 1145 columnIndex: page.columnIndex, 1146 } 1147 } 1148 1149 func (page *fixedLenByteArrayPage) makeValueBytes(v []byte) Value { 1150 value := makeValueBytes(FixedLenByteArray, v) 1151 value.columnIndex = page.columnIndex 1152 return value 1153 } 1154 1155 func (page *fixedLenByteArrayPage) makeValueString(v string) Value { 1156 value := makeValueString(FixedLenByteArray, v) 1157 value.columnIndex = page.columnIndex 1158 return value 1159 } 1160 1161 type uint32Page struct { 1162 typ Type 1163 values []uint32 1164 columnIndex int16 1165 } 1166 1167 func newUint32Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *uint32Page { 1168 return &uint32Page{ 1169 typ: typ, 1170 values: values.Uint32()[:numValues], 1171 columnIndex: ^columnIndex, 1172 } 1173 } 1174 1175 func (page *uint32Page) Type() Type { return page.typ } 1176 1177 func (page *uint32Page) Column() int { return int(^page.columnIndex) } 1178 1179 func (page *uint32Page) Dictionary() Dictionary { return nil } 1180 1181 func (page *uint32Page) NumRows() int64 { return int64(len(page.values)) } 1182 1183 func (page *uint32Page) NumValues() int64 { return int64(len(page.values)) } 1184 1185 func (page *uint32Page) NumNulls() int64 { return 0 } 1186 1187 func (page *uint32Page) Size() int64 { return 4 * int64(len(page.values)) } 1188 1189 func (page *uint32Page) RepetitionLevels() []byte { return nil } 1190 1191 func (page *uint32Page) DefinitionLevels() []byte { return nil } 1192 1193 func (page *uint32Page) Data() encoding.Values { return encoding.Uint32Values(page.values) } 1194 1195 func (page *uint32Page) Values() ValueReader { return &uint32PageValues{page: page} } 1196 1197 func (page *uint32Page) min() uint32 { return minUint32(page.values) } 1198 1199 func (page *uint32Page) max() uint32 { return maxUint32(page.values) } 1200 1201 func (page *uint32Page) bounds() (min, max uint32) { return boundsUint32(page.values) } 1202 1203 func (page *uint32Page) Bounds() (min, max Value, ok bool) { 1204 if ok = len(page.values) > 0; ok { 1205 minUint32, maxUint32 := page.bounds() 1206 min = page.makeValue(minUint32) 1207 max = page.makeValue(maxUint32) 1208 } 1209 return min, max, ok 1210 } 1211 1212 func (page *uint32Page) Slice(i, j int64) Page { 1213 return &uint32Page{ 1214 typ: page.typ, 1215 values: page.values[i:j], 1216 columnIndex: page.columnIndex, 1217 } 1218 } 1219 1220 func (page *uint32Page) makeValue(v uint32) Value { 1221 value := makeValueUint32(v) 1222 value.columnIndex = page.columnIndex 1223 return value 1224 } 1225 1226 type uint64Page struct { 1227 typ Type 1228 values []uint64 1229 columnIndex int16 1230 } 1231 1232 func newUint64Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *uint64Page { 1233 return &uint64Page{ 1234 typ: typ, 1235 values: values.Uint64()[:numValues], 1236 columnIndex: ^columnIndex, 1237 } 1238 } 1239 1240 func (page *uint64Page) Type() Type { return page.typ } 1241 1242 func (page *uint64Page) Column() int { return int(^page.columnIndex) } 1243 1244 func (page *uint64Page) Dictionary() Dictionary { return nil } 1245 1246 func (page *uint64Page) NumRows() int64 { return int64(len(page.values)) } 1247 1248 func (page *uint64Page) NumValues() int64 { return int64(len(page.values)) } 1249 1250 func (page *uint64Page) NumNulls() int64 { return 0 } 1251 1252 func (page *uint64Page) Size() int64 { return 8 * int64(len(page.values)) } 1253 1254 func (page *uint64Page) RepetitionLevels() []byte { return nil } 1255 1256 func (page *uint64Page) DefinitionLevels() []byte { return nil } 1257 1258 func (page *uint64Page) Data() encoding.Values { return encoding.Uint64Values(page.values) } 1259 1260 func (page *uint64Page) Values() ValueReader { return &uint64PageValues{page: page} } 1261 1262 func (page *uint64Page) min() uint64 { return minUint64(page.values) } 1263 1264 func (page *uint64Page) max() uint64 { return maxUint64(page.values) } 1265 1266 func (page *uint64Page) bounds() (min, max uint64) { return boundsUint64(page.values) } 1267 1268 func (page *uint64Page) Bounds() (min, max Value, ok bool) { 1269 if ok = len(page.values) > 0; ok { 1270 minUint64, maxUint64 := page.bounds() 1271 min = page.makeValue(minUint64) 1272 max = page.makeValue(maxUint64) 1273 } 1274 return min, max, ok 1275 } 1276 1277 func (page *uint64Page) Slice(i, j int64) Page { 1278 return &uint64Page{ 1279 typ: page.typ, 1280 values: page.values[i:j], 1281 columnIndex: page.columnIndex, 1282 } 1283 } 1284 1285 func (page *uint64Page) makeValue(v uint64) Value { 1286 value := makeValueUint64(v) 1287 value.columnIndex = page.columnIndex 1288 return value 1289 } 1290 1291 type be128Page struct { 1292 typ Type 1293 values [][16]byte 1294 columnIndex int16 1295 } 1296 1297 func newBE128Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *be128Page { 1298 return &be128Page{ 1299 typ: typ, 1300 values: values.Uint128()[:numValues], 1301 columnIndex: ^columnIndex, 1302 } 1303 } 1304 1305 func (page *be128Page) Type() Type { return page.typ } 1306 1307 func (page *be128Page) Column() int { return int(^page.columnIndex) } 1308 1309 func (page *be128Page) Dictionary() Dictionary { return nil } 1310 1311 func (page *be128Page) NumRows() int64 { return int64(len(page.values)) } 1312 1313 func (page *be128Page) NumValues() int64 { return int64(len(page.values)) } 1314 1315 func (page *be128Page) NumNulls() int64 { return 0 } 1316 1317 func (page *be128Page) Size() int64 { return 16 * int64(len(page.values)) } 1318 1319 func (page *be128Page) RepetitionLevels() []byte { return nil } 1320 1321 func (page *be128Page) DefinitionLevels() []byte { return nil } 1322 1323 func (page *be128Page) Data() encoding.Values { return encoding.Uint128Values(page.values) } 1324 1325 func (page *be128Page) Values() ValueReader { return &be128PageValues{page: page} } 1326 1327 func (page *be128Page) min() []byte { return minBE128(page.values) } 1328 1329 func (page *be128Page) max() []byte { return maxBE128(page.values) } 1330 1331 func (page *be128Page) bounds() (min, max []byte) { return boundsBE128(page.values) } 1332 1333 func (page *be128Page) Bounds() (min, max Value, ok bool) { 1334 if ok = len(page.values) > 0; ok { 1335 minBytes, maxBytes := page.bounds() 1336 min = page.makeValueBytes(minBytes) 1337 max = page.makeValueBytes(maxBytes) 1338 } 1339 return min, max, ok 1340 } 1341 1342 func (page *be128Page) Slice(i, j int64) Page { 1343 return &be128Page{ 1344 typ: page.typ, 1345 values: page.values[i:j], 1346 columnIndex: page.columnIndex, 1347 } 1348 } 1349 1350 func (page *be128Page) makeValue(v *[16]byte) Value { 1351 return page.makeValueBytes(v[:]) 1352 } 1353 1354 func (page *be128Page) makeValueBytes(v []byte) Value { 1355 value := makeValueBytes(FixedLenByteArray, v) 1356 value.columnIndex = page.columnIndex 1357 return value 1358 } 1359 1360 func (page *be128Page) makeValueString(v string) Value { 1361 value := makeValueString(FixedLenByteArray, v) 1362 value.columnIndex = page.columnIndex 1363 return value 1364 } 1365 1366 type nullPage struct { 1367 typ Type 1368 column int 1369 count int 1370 } 1371 1372 func newNullPage(typ Type, columnIndex int16, numValues int32) *nullPage { 1373 return &nullPage{ 1374 typ: typ, 1375 column: int(columnIndex), 1376 count: int(numValues), 1377 } 1378 } 1379 1380 func (page *nullPage) Type() Type { return page.typ } 1381 func (page *nullPage) Column() int { return page.column } 1382 func (page *nullPage) Dictionary() Dictionary { return nil } 1383 func (page *nullPage) NumRows() int64 { return int64(page.count) } 1384 func (page *nullPage) NumValues() int64 { return int64(page.count) } 1385 func (page *nullPage) NumNulls() int64 { return int64(page.count) } 1386 func (page *nullPage) Bounds() (min, max Value, ok bool) { return } 1387 func (page *nullPage) Size() int64 { return 1 } 1388 func (page *nullPage) Values() ValueReader { 1389 return &nullPageValues{column: page.column, remain: page.count} 1390 } 1391 func (page *nullPage) Slice(i, j int64) Page { 1392 return &nullPage{column: page.column, count: page.count - int(j-i)} 1393 } 1394 func (page *nullPage) RepetitionLevels() []byte { return nil } 1395 func (page *nullPage) DefinitionLevels() []byte { return nil } 1396 func (page *nullPage) Data() encoding.Values { return encoding.Values{} }