github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/page.go (about) 1 package parquet 2 3 import ( 4 "bytes" 5 "fmt" 6 "io" 7 8 "github.com/segmentio/parquet-go/deprecated" 9 "github.com/segmentio/parquet-go/encoding" 10 "github.com/segmentio/parquet-go/internal/bitpack" 11 "github.com/segmentio/parquet-go/internal/debug" 12 ) 13 14 // Page values represent sequences of parquet values. From the Parquet 15 // documentation: "Column chunks are a chunk of the data for a particular 16 // column. They live in a particular row group and are guaranteed to be 17 // contiguous in the file. Column chunks are divided up into pages. A page is 18 // conceptually an indivisible unit (in terms of compression and encoding). 19 // There can be multiple page types which are interleaved in a column chunk." 20 // 21 // https://github.com/apache/parquet-format#glossary 22 type Page interface { 23 // Returns the type of values read from this page. 24 // 25 // The returned type can be used to encode the page data, in the case of 26 // an indexed page (which has a dictionary), the type is configured to 27 // encode the indexes stored in the page rather than the plain values. 28 Type() Type 29 30 // Returns the column index that this page belongs to. 31 Column() int 32 33 // If the page contains indexed values, calling this method returns the 34 // dictionary in which the values are looked up. Otherwise, the method 35 // returns nil. 36 Dictionary() Dictionary 37 38 // Returns the number of rows, values, and nulls in the page. The number of 39 // rows may be less than the number of values in the page if the page is 40 // part of a repeated column. 41 NumRows() int64 42 NumValues() int64 43 NumNulls() int64 44 45 // Returns the page's min and max values. 46 // 47 // The third value is a boolean indicating whether the page bounds were 48 // available. Page bounds may not be known if the page contained no values 49 // or only nulls, or if they were read from a parquet file which had neither 50 // page statistics nor a page index. 51 Bounds() (min, max Value, ok bool) 52 53 // Returns the size of the page in bytes (uncompressed). 54 Size() int64 55 56 // Returns a reader exposing the values contained in the page. 57 // 58 // Depending on the underlying implementation, the returned reader may 59 // support reading an array of typed Go values by implementing interfaces 60 // like parquet.Int32Reader. Applications should use type assertions on 61 // the returned reader to determine whether those optimizations are 62 // available. 63 Values() ValueReader 64 65 // Returns a new page which is as slice of the receiver between row indexes 66 // i and j. 67 Slice(i, j int64) Page 68 69 // Expose the lists of repetition and definition levels of the page. 70 // 71 // The returned slices may be empty when the page has no repetition or 72 // definition levels. 73 RepetitionLevels() []byte 74 DefinitionLevels() []byte 75 76 // Returns the in-memory buffer holding the page values. 77 // 78 // The intent is for the returned value to be used as input parameter when 79 // calling the Encode method of the associated Type. 80 // 81 // The slices referenced by the encoding.Values may be the same across 82 // multiple calls to this method, applications must treat the content as 83 // immutable. 84 Data() encoding.Values 85 } 86 87 // PageReader is an interface implemented by types that support producing a 88 // sequence of pages. 89 type PageReader interface { 90 // Reads and returns the next page from the sequence. When all pages have 91 // been read, or if the sequence was closed, the method returns io.EOF. 92 ReadPage() (Page, error) 93 } 94 95 // PageWriter is an interface implemented by types that support writing pages 96 // to an underlying storage medium. 97 type PageWriter interface { 98 WritePage(Page) (int64, error) 99 } 100 101 // Pages is an interface implemented by page readers returned by calling the 102 // Pages method of ColumnChunk instances. 103 type Pages interface { 104 PageReader 105 RowSeeker 106 io.Closer 107 } 108 109 // AsyncPages wraps the given Pages instance to perform page reads 110 // asynchronously in a separate goroutine. 111 // 112 // Performing page reads asynchronously is important when the application may 113 // be reading pages from a high latency backend, and the last 114 // page read may be processed while initiating reading of the next page. 115 func AsyncPages(pages Pages) Pages { 116 p := new(asyncPages) 117 p.init(pages, nil) 118 // If the pages object gets garbage collected without Close being called, 119 // this finalizer would ensure that the goroutine is stopped and doesn't 120 // leak. 121 debug.SetFinalizer(p, func(p *asyncPages) { p.Close() }) 122 return p 123 } 124 125 type asyncPages struct { 126 read <-chan asyncPage 127 seek chan<- int64 128 done chan<- struct{} 129 version int64 130 } 131 132 type asyncPage struct { 133 page Page 134 err error 135 version int64 136 } 137 138 func (pages *asyncPages) init(base Pages, done chan struct{}) { 139 read := make(chan asyncPage) 140 seek := make(chan int64, 1) 141 142 pages.read = read 143 pages.seek = seek 144 145 if done == nil { 146 done = make(chan struct{}) 147 pages.done = done 148 } 149 150 go readPages(base, read, seek, done) 151 } 152 153 func (pages *asyncPages) Close() (err error) { 154 if pages.done != nil { 155 close(pages.done) 156 pages.done = nil 157 } 158 for p := range pages.read { 159 // Capture the last error, which is the value returned from closing the 160 // underlying Pages instance. 161 err = p.err 162 } 163 pages.seek = nil 164 return err 165 } 166 167 func (pages *asyncPages) ReadPage() (Page, error) { 168 for { 169 p, ok := <-pages.read 170 if !ok { 171 return nil, io.EOF 172 } 173 // Because calls to SeekToRow might be made concurrently to reading 174 // pages, it is possible for ReadPage to see pages that were read before 175 // the last SeekToRow call. 176 // 177 // A version number is attached to each page read asynchronously to 178 // discard outdated pages and ensure that we maintain a consistent view 179 // of the sequence of pages read. 180 if p.version == pages.version { 181 return p.page, p.err 182 } 183 } 184 } 185 186 func (pages *asyncPages) SeekToRow(rowIndex int64) error { 187 if pages.seek == nil { 188 return io.ErrClosedPipe 189 } 190 // The seek channel has a capacity of 1 to allow the first SeekToRow call to 191 // be non-blocking. 192 // 193 // If SeekToRow calls are performed faster than they can be handled by the 194 // goroutine reading pages, this path might become a contention point. 195 pages.seek <- rowIndex 196 pages.version++ 197 return nil 198 } 199 200 func readPages(pages Pages, read chan<- asyncPage, seek <-chan int64, done <-chan struct{}) { 201 defer func() { 202 read <- asyncPage{err: pages.Close(), version: -1} 203 close(read) 204 }() 205 206 version := int64(0) 207 for { 208 page, err := pages.ReadPage() 209 210 for { 211 select { 212 case <-done: 213 return 214 case read <- asyncPage{ 215 page: page, 216 err: err, 217 version: version, 218 }: 219 case rowIndex := <-seek: 220 version++ 221 err = pages.SeekToRow(rowIndex) 222 } 223 if err == nil { 224 break 225 } 226 } 227 } 228 } 229 230 type singlePage struct { 231 page Page 232 seek int64 233 numRows int64 234 } 235 236 func (r *singlePage) ReadPage() (Page, error) { 237 if r.page != nil { 238 if r.seek < r.numRows { 239 seek := r.seek 240 r.seek = r.numRows 241 if seek > 0 { 242 return r.page.Slice(seek, r.numRows), nil 243 } 244 return r.page, nil 245 } 246 } 247 return nil, io.EOF 248 } 249 250 func (r *singlePage) SeekToRow(rowIndex int64) error { 251 r.seek = rowIndex 252 return nil 253 } 254 255 func (r *singlePage) Close() error { 256 r.page = nil 257 r.seek = 0 258 return nil 259 } 260 261 func onePage(page Page) Pages { 262 return &singlePage{page: page, numRows: page.NumRows()} 263 } 264 265 // CopyPages copies pages from src to dst, returning the number of values that 266 // were copied. 267 // 268 // The function returns any error it encounters reading or writing pages, except 269 // for io.EOF from the reader which indicates that there were no more pages to 270 // read. 271 func CopyPages(dst PageWriter, src PageReader) (numValues int64, err error) { 272 for { 273 p, err := src.ReadPage() 274 if err != nil { 275 if err == io.EOF { 276 err = nil 277 } 278 return numValues, err 279 } 280 n, err := dst.WritePage(p) 281 numValues += n 282 if err != nil { 283 return numValues, err 284 } 285 } 286 } 287 288 // errorPage is an implementation of the Page interface which always errors when 289 // attempting to read its values. 290 // 291 // The error page declares that it contains one value (even if it does not) 292 // as a way to ensure that it is not ignored due to being empty when written 293 // to a file. 294 type errorPage struct { 295 typ Type 296 err error 297 columnIndex int 298 } 299 300 func newErrorPage(typ Type, columnIndex int, msg string, args ...interface{}) *errorPage { 301 return &errorPage{ 302 typ: typ, 303 err: fmt.Errorf(msg, args...), 304 columnIndex: columnIndex, 305 } 306 } 307 308 func (page *errorPage) Type() Type { return page.typ } 309 func (page *errorPage) Column() int { return page.columnIndex } 310 func (page *errorPage) Dictionary() Dictionary { return nil } 311 func (page *errorPage) NumRows() int64 { return 1 } 312 func (page *errorPage) NumValues() int64 { return 1 } 313 func (page *errorPage) NumNulls() int64 { return 0 } 314 func (page *errorPage) Bounds() (min, max Value, ok bool) { return } 315 func (page *errorPage) Slice(i, j int64) Page { return page } 316 func (page *errorPage) Size() int64 { return 1 } 317 func (page *errorPage) RepetitionLevels() []byte { return nil } 318 func (page *errorPage) DefinitionLevels() []byte { return nil } 319 func (page *errorPage) Data() encoding.Values { return encoding.Values{} } 320 func (page *errorPage) Values() ValueReader { return errorPageValues{page: page} } 321 322 type errorPageValues struct{ page *errorPage } 323 324 func (r errorPageValues) ReadValues([]Value) (int, error) { return 0, r.page.err } 325 func (r errorPageValues) Close() error { return nil } 326 327 func errPageBoundsOutOfRange(i, j, n int64) error { 328 return fmt.Errorf("page bounds out of range [%d:%d]: with length %d", i, j, n) 329 } 330 331 type optionalPage struct { 332 base Page 333 maxDefinitionLevel byte 334 definitionLevels []byte 335 } 336 337 func newOptionalPage(base Page, maxDefinitionLevel byte, definitionLevels []byte) *optionalPage { 338 return &optionalPage{ 339 base: base, 340 maxDefinitionLevel: maxDefinitionLevel, 341 definitionLevels: definitionLevels, 342 } 343 } 344 345 func (page *optionalPage) Type() Type { return page.base.Type() } 346 347 func (page *optionalPage) Column() int { return page.base.Column() } 348 349 func (page *optionalPage) Dictionary() Dictionary { return page.base.Dictionary() } 350 351 func (page *optionalPage) NumRows() int64 { return int64(len(page.definitionLevels)) } 352 353 func (page *optionalPage) NumValues() int64 { return int64(len(page.definitionLevels)) } 354 355 func (page *optionalPage) NumNulls() int64 { 356 return int64(countLevelsNotEqual(page.definitionLevels, page.maxDefinitionLevel)) 357 } 358 359 func (page *optionalPage) Bounds() (min, max Value, ok bool) { return page.base.Bounds() } 360 361 func (page *optionalPage) Size() int64 { return int64(len(page.definitionLevels)) + page.base.Size() } 362 363 func (page *optionalPage) RepetitionLevels() []byte { return nil } 364 365 func (page *optionalPage) DefinitionLevels() []byte { return page.definitionLevels } 366 367 func (page *optionalPage) Data() encoding.Values { return page.base.Data() } 368 369 func (page *optionalPage) Values() ValueReader { 370 return &optionalPageValues{ 371 page: page, 372 values: page.base.Values(), 373 } 374 } 375 376 func (page *optionalPage) Slice(i, j int64) Page { 377 maxDefinitionLevel := page.maxDefinitionLevel 378 definitionLevels := page.definitionLevels 379 numNulls1 := int64(countLevelsNotEqual(definitionLevels[:i], maxDefinitionLevel)) 380 numNulls2 := int64(countLevelsNotEqual(definitionLevels[i:j], maxDefinitionLevel)) 381 return newOptionalPage( 382 page.base.Slice(i-numNulls1, j-(numNulls1+numNulls2)), 383 maxDefinitionLevel, 384 definitionLevels[i:j:j], 385 ) 386 } 387 388 type repeatedPage struct { 389 base Page 390 maxRepetitionLevel byte 391 maxDefinitionLevel byte 392 definitionLevels []byte 393 repetitionLevels []byte 394 } 395 396 func newRepeatedPage(base Page, maxRepetitionLevel, maxDefinitionLevel byte, repetitionLevels, definitionLevels []byte) *repeatedPage { 397 return &repeatedPage{ 398 base: base, 399 maxRepetitionLevel: maxRepetitionLevel, 400 maxDefinitionLevel: maxDefinitionLevel, 401 definitionLevels: definitionLevels, 402 repetitionLevels: repetitionLevels, 403 } 404 } 405 406 func (page *repeatedPage) Type() Type { return page.base.Type() } 407 408 func (page *repeatedPage) Column() int { return page.base.Column() } 409 410 func (page *repeatedPage) Dictionary() Dictionary { return page.base.Dictionary() } 411 412 func (page *repeatedPage) NumRows() int64 { return int64(countLevelsEqual(page.repetitionLevels, 0)) } 413 414 func (page *repeatedPage) NumValues() int64 { return int64(len(page.definitionLevels)) } 415 416 func (page *repeatedPage) NumNulls() int64 { 417 return int64(countLevelsNotEqual(page.definitionLevels, page.maxDefinitionLevel)) 418 } 419 420 func (page *repeatedPage) Bounds() (min, max Value, ok bool) { return page.base.Bounds() } 421 422 func (page *repeatedPage) Size() int64 { 423 return int64(len(page.repetitionLevels)) + int64(len(page.definitionLevels)) + page.base.Size() 424 } 425 426 func (page *repeatedPage) RepetitionLevels() []byte { return page.repetitionLevels } 427 428 func (page *repeatedPage) DefinitionLevels() []byte { return page.definitionLevels } 429 430 func (page *repeatedPage) Data() encoding.Values { return page.base.Data() } 431 432 func (page *repeatedPage) Values() ValueReader { 433 return &repeatedPageValues{ 434 page: page, 435 values: page.base.Values(), 436 } 437 } 438 439 func (page *repeatedPage) Slice(i, j int64) Page { 440 numRows := page.NumRows() 441 if i < 0 || i > numRows { 442 panic(errPageBoundsOutOfRange(i, j, numRows)) 443 } 444 if j < 0 || j > numRows { 445 panic(errPageBoundsOutOfRange(i, j, numRows)) 446 } 447 if i > j { 448 panic(errPageBoundsOutOfRange(i, j, numRows)) 449 } 450 451 maxRepetitionLevel := page.maxRepetitionLevel 452 maxDefinitionLevel := page.maxDefinitionLevel 453 repetitionLevels := page.repetitionLevels 454 definitionLevels := page.definitionLevels 455 456 rowIndex0 := 0 457 rowIndex1 := len(repetitionLevels) 458 rowIndex2 := len(repetitionLevels) 459 460 for k, def := range repetitionLevels { 461 if def == 0 { 462 if rowIndex0 == int(i) { 463 rowIndex1 = k 464 break 465 } 466 rowIndex0++ 467 } 468 } 469 470 for k, def := range repetitionLevels[rowIndex1:] { 471 if def == 0 { 472 if rowIndex0 == int(j) { 473 rowIndex2 = rowIndex1 + k 474 break 475 } 476 rowIndex0++ 477 } 478 } 479 480 numNulls1 := countLevelsNotEqual(definitionLevels[:rowIndex1], maxDefinitionLevel) 481 numNulls2 := countLevelsNotEqual(definitionLevels[rowIndex1:rowIndex2], maxDefinitionLevel) 482 483 i = int64(rowIndex1 - numNulls1) 484 j = int64(rowIndex2 - (numNulls1 + numNulls2)) 485 486 return newRepeatedPage( 487 page.base.Slice(i, j), 488 maxRepetitionLevel, 489 maxDefinitionLevel, 490 repetitionLevels[rowIndex1:rowIndex2:rowIndex2], 491 definitionLevels[rowIndex1:rowIndex2:rowIndex2], 492 ) 493 } 494 495 type booleanPage struct { 496 typ Type 497 bits []byte 498 offset int32 499 numValues int32 500 columnIndex int16 501 } 502 503 func newBooleanPage(typ Type, columnIndex int16, numValues int32, values encoding.Values) *booleanPage { 504 return &booleanPage{ 505 typ: typ, 506 bits: values.Boolean()[:bitpack.ByteCount(uint(numValues))], 507 numValues: numValues, 508 columnIndex: ^columnIndex, 509 } 510 } 511 512 func (page *booleanPage) Type() Type { return page.typ } 513 514 func (page *booleanPage) Column() int { return int(^page.columnIndex) } 515 516 func (page *booleanPage) Dictionary() Dictionary { return nil } 517 518 func (page *booleanPage) NumRows() int64 { return int64(page.numValues) } 519 520 func (page *booleanPage) NumValues() int64 { return int64(page.numValues) } 521 522 func (page *booleanPage) NumNulls() int64 { return 0 } 523 524 func (page *booleanPage) Size() int64 { return int64(len(page.bits)) } 525 526 func (page *booleanPage) RepetitionLevels() []byte { return nil } 527 528 func (page *booleanPage) DefinitionLevels() []byte { return nil } 529 530 func (page *booleanPage) Data() encoding.Values { return encoding.BooleanValues(page.bits) } 531 532 func (page *booleanPage) Values() ValueReader { return &booleanPageValues{page: page} } 533 534 func (page *booleanPage) valueAt(i int) bool { 535 j := uint32(int(page.offset)+i) / 8 536 k := uint32(int(page.offset)+i) % 8 537 return ((page.bits[j] >> k) & 1) != 0 538 } 539 540 func (page *booleanPage) min() bool { 541 for i := 0; i < int(page.numValues); i++ { 542 if !page.valueAt(i) { 543 return false 544 } 545 } 546 return page.numValues > 0 547 } 548 549 func (page *booleanPage) max() bool { 550 for i := 0; i < int(page.numValues); i++ { 551 if page.valueAt(i) { 552 return true 553 } 554 } 555 return false 556 } 557 558 func (page *booleanPage) bounds() (min, max bool) { 559 hasFalse, hasTrue := false, false 560 561 for i := 0; i < int(page.numValues); i++ { 562 v := page.valueAt(i) 563 if v { 564 hasTrue = true 565 } else { 566 hasFalse = true 567 } 568 if hasTrue && hasFalse { 569 break 570 } 571 } 572 573 min = !hasFalse 574 max = hasTrue 575 return min, max 576 } 577 578 func (page *booleanPage) Bounds() (min, max Value, ok bool) { 579 if ok = page.numValues > 0; ok { 580 minBool, maxBool := page.bounds() 581 min = page.makeValue(minBool) 582 max = page.makeValue(maxBool) 583 } 584 return min, max, ok 585 } 586 587 func (page *booleanPage) Slice(i, j int64) Page { 588 off := i / 8 589 end := j / 8 590 591 if (j % 8) != 0 { 592 end++ 593 } 594 595 return &booleanPage{ 596 typ: page.typ, 597 bits: page.bits[off:end], 598 offset: int32(i % 8), 599 numValues: int32(j - i), 600 columnIndex: page.columnIndex, 601 } 602 } 603 604 func (page *booleanPage) makeValue(v bool) Value { 605 value := makeValueBoolean(v) 606 value.columnIndex = page.columnIndex 607 return value 608 } 609 610 type int32Page struct { 611 typ Type 612 values []int32 613 columnIndex int16 614 } 615 616 func newInt32Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *int32Page { 617 return &int32Page{ 618 typ: typ, 619 values: values.Int32()[:numValues], 620 columnIndex: ^columnIndex, 621 } 622 } 623 624 func (page *int32Page) Type() Type { return page.typ } 625 626 func (page *int32Page) Column() int { return int(^page.columnIndex) } 627 628 func (page *int32Page) Dictionary() Dictionary { return nil } 629 630 func (page *int32Page) NumRows() int64 { return int64(len(page.values)) } 631 632 func (page *int32Page) NumValues() int64 { return int64(len(page.values)) } 633 634 func (page *int32Page) NumNulls() int64 { return 0 } 635 636 func (page *int32Page) Size() int64 { return 4 * int64(len(page.values)) } 637 638 func (page *int32Page) RepetitionLevels() []byte { return nil } 639 640 func (page *int32Page) DefinitionLevels() []byte { return nil } 641 642 func (page *int32Page) Data() encoding.Values { return encoding.Int32Values(page.values) } 643 644 func (page *int32Page) Values() ValueReader { return &int32PageValues{page: page} } 645 646 func (page *int32Page) min() int32 { return minInt32(page.values) } 647 648 func (page *int32Page) max() int32 { return maxInt32(page.values) } 649 650 func (page *int32Page) bounds() (min, max int32) { return boundsInt32(page.values) } 651 652 func (page *int32Page) Bounds() (min, max Value, ok bool) { 653 if ok = len(page.values) > 0; ok { 654 minInt32, maxInt32 := page.bounds() 655 min = page.makeValue(minInt32) 656 max = page.makeValue(maxInt32) 657 } 658 return min, max, ok 659 } 660 661 func (page *int32Page) Slice(i, j int64) Page { 662 return &int32Page{ 663 typ: page.typ, 664 values: page.values[i:j], 665 columnIndex: page.columnIndex, 666 } 667 } 668 669 func (page *int32Page) makeValue(v int32) Value { 670 value := makeValueInt32(v) 671 value.columnIndex = page.columnIndex 672 return value 673 } 674 675 type int64Page struct { 676 typ Type 677 values []int64 678 columnIndex int16 679 } 680 681 func newInt64Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *int64Page { 682 return &int64Page{ 683 typ: typ, 684 values: values.Int64()[:numValues], 685 columnIndex: ^columnIndex, 686 } 687 } 688 689 func (page *int64Page) Type() Type { return page.typ } 690 691 func (page *int64Page) Column() int { return int(^page.columnIndex) } 692 693 func (page *int64Page) Dictionary() Dictionary { return nil } 694 695 func (page *int64Page) NumRows() int64 { return int64(len(page.values)) } 696 697 func (page *int64Page) NumValues() int64 { return int64(len(page.values)) } 698 699 func (page *int64Page) NumNulls() int64 { return 0 } 700 701 func (page *int64Page) Size() int64 { return 8 * int64(len(page.values)) } 702 703 func (page *int64Page) RepetitionLevels() []byte { return nil } 704 705 func (page *int64Page) DefinitionLevels() []byte { return nil } 706 707 func (page *int64Page) Data() encoding.Values { return encoding.Int64Values(page.values) } 708 709 func (page *int64Page) Values() ValueReader { return &int64PageValues{page: page} } 710 711 func (page *int64Page) min() int64 { return minInt64(page.values) } 712 713 func (page *int64Page) max() int64 { return maxInt64(page.values) } 714 715 func (page *int64Page) bounds() (min, max int64) { return boundsInt64(page.values) } 716 717 func (page *int64Page) Bounds() (min, max Value, ok bool) { 718 if ok = len(page.values) > 0; ok { 719 minInt64, maxInt64 := page.bounds() 720 min = page.makeValue(minInt64) 721 max = page.makeValue(maxInt64) 722 } 723 return min, max, ok 724 } 725 726 func (page *int64Page) Slice(i, j int64) Page { 727 return &int64Page{ 728 typ: page.typ, 729 values: page.values[i:j], 730 columnIndex: page.columnIndex, 731 } 732 } 733 734 func (page *int64Page) makeValue(v int64) Value { 735 value := makeValueInt64(v) 736 value.columnIndex = page.columnIndex 737 return value 738 } 739 740 type int96Page struct { 741 typ Type 742 values []deprecated.Int96 743 columnIndex int16 744 } 745 746 func newInt96Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *int96Page { 747 return &int96Page{ 748 typ: typ, 749 values: values.Int96()[:numValues], 750 columnIndex: ^columnIndex, 751 } 752 } 753 754 func (page *int96Page) Type() Type { return page.typ } 755 756 func (page *int96Page) Column() int { return int(^page.columnIndex) } 757 758 func (page *int96Page) Dictionary() Dictionary { return nil } 759 760 func (page *int96Page) NumRows() int64 { return int64(len(page.values)) } 761 762 func (page *int96Page) NumValues() int64 { return int64(len(page.values)) } 763 764 func (page *int96Page) NumNulls() int64 { return 0 } 765 766 func (page *int96Page) Size() int64 { return 12 * int64(len(page.values)) } 767 768 func (page *int96Page) RepetitionLevels() []byte { return nil } 769 770 func (page *int96Page) DefinitionLevels() []byte { return nil } 771 772 func (page *int96Page) Data() encoding.Values { return encoding.Int96Values(page.values) } 773 774 func (page *int96Page) Values() ValueReader { return &int96PageValues{page: page} } 775 776 func (page *int96Page) min() deprecated.Int96 { return deprecated.MinInt96(page.values) } 777 778 func (page *int96Page) max() deprecated.Int96 { return deprecated.MaxInt96(page.values) } 779 780 func (page *int96Page) bounds() (min, max deprecated.Int96) { 781 return deprecated.MinMaxInt96(page.values) 782 } 783 784 func (page *int96Page) Bounds() (min, max Value, ok bool) { 785 if ok = len(page.values) > 0; ok { 786 minInt96, maxInt96 := page.bounds() 787 min = page.makeValue(minInt96) 788 max = page.makeValue(maxInt96) 789 } 790 return min, max, ok 791 } 792 793 func (page *int96Page) Slice(i, j int64) Page { 794 return &int96Page{ 795 typ: page.typ, 796 values: page.values[i:j], 797 columnIndex: page.columnIndex, 798 } 799 } 800 801 func (page *int96Page) makeValue(v deprecated.Int96) Value { 802 value := makeValueInt96(v) 803 value.columnIndex = page.columnIndex 804 return value 805 } 806 807 type floatPage struct { 808 typ Type 809 values []float32 810 columnIndex int16 811 } 812 813 func newFloatPage(typ Type, columnIndex int16, numValues int32, values encoding.Values) *floatPage { 814 return &floatPage{ 815 typ: typ, 816 values: values.Float()[:numValues], 817 columnIndex: ^columnIndex, 818 } 819 } 820 821 func (page *floatPage) Type() Type { return page.typ } 822 823 func (page *floatPage) Column() int { return int(^page.columnIndex) } 824 825 func (page *floatPage) Dictionary() Dictionary { return nil } 826 827 func (page *floatPage) NumRows() int64 { return int64(len(page.values)) } 828 829 func (page *floatPage) NumValues() int64 { return int64(len(page.values)) } 830 831 func (page *floatPage) NumNulls() int64 { return 0 } 832 833 func (page *floatPage) Size() int64 { return 4 * int64(len(page.values)) } 834 835 func (page *floatPage) RepetitionLevels() []byte { return nil } 836 837 func (page *floatPage) DefinitionLevels() []byte { return nil } 838 839 func (page *floatPage) Data() encoding.Values { return encoding.FloatValues(page.values) } 840 841 func (page *floatPage) Values() ValueReader { return &floatPageValues{page: page} } 842 843 func (page *floatPage) min() float32 { return minFloat32(page.values) } 844 845 func (page *floatPage) max() float32 { return maxFloat32(page.values) } 846 847 func (page *floatPage) bounds() (min, max float32) { return boundsFloat32(page.values) } 848 849 func (page *floatPage) Bounds() (min, max Value, ok bool) { 850 if ok = len(page.values) > 0; ok { 851 minFloat32, maxFloat32 := page.bounds() 852 min = page.makeValue(minFloat32) 853 max = page.makeValue(maxFloat32) 854 } 855 return min, max, ok 856 } 857 858 func (page *floatPage) Slice(i, j int64) Page { 859 return &floatPage{ 860 typ: page.typ, 861 values: page.values[i:j], 862 columnIndex: page.columnIndex, 863 } 864 } 865 866 func (page *floatPage) makeValue(v float32) Value { 867 value := makeValueFloat(v) 868 value.columnIndex = page.columnIndex 869 return value 870 } 871 872 type doublePage struct { 873 typ Type 874 values []float64 875 columnIndex int16 876 } 877 878 func newDoublePage(typ Type, columnIndex int16, numValues int32, values encoding.Values) *doublePage { 879 return &doublePage{ 880 typ: typ, 881 values: values.Double()[:numValues], 882 columnIndex: ^columnIndex, 883 } 884 } 885 886 func (page *doublePage) Type() Type { return page.typ } 887 888 func (page *doublePage) Column() int { return int(^page.columnIndex) } 889 890 func (page *doublePage) Dictionary() Dictionary { return nil } 891 892 func (page *doublePage) NumRows() int64 { return int64(len(page.values)) } 893 894 func (page *doublePage) NumValues() int64 { return int64(len(page.values)) } 895 896 func (page *doublePage) NumNulls() int64 { return 0 } 897 898 func (page *doublePage) Size() int64 { return 8 * int64(len(page.values)) } 899 900 func (page *doublePage) RepetitionLevels() []byte { return nil } 901 902 func (page *doublePage) DefinitionLevels() []byte { return nil } 903 904 func (page *doublePage) Data() encoding.Values { return encoding.DoubleValues(page.values) } 905 906 func (page *doublePage) Values() ValueReader { return &doublePageValues{page: page} } 907 908 func (page *doublePage) min() float64 { return minFloat64(page.values) } 909 910 func (page *doublePage) max() float64 { return maxFloat64(page.values) } 911 912 func (page *doublePage) bounds() (min, max float64) { return boundsFloat64(page.values) } 913 914 func (page *doublePage) Bounds() (min, max Value, ok bool) { 915 if ok = len(page.values) > 0; ok { 916 minFloat64, maxFloat64 := page.bounds() 917 min = page.makeValue(minFloat64) 918 max = page.makeValue(maxFloat64) 919 } 920 return min, max, ok 921 } 922 923 func (page *doublePage) Slice(i, j int64) Page { 924 return &doublePage{ 925 typ: page.typ, 926 values: page.values[i:j], 927 columnIndex: page.columnIndex, 928 } 929 } 930 931 func (page *doublePage) makeValue(v float64) Value { 932 value := makeValueDouble(v) 933 value.columnIndex = page.columnIndex 934 return value 935 } 936 937 type byteArrayPage struct { 938 typ Type 939 values []byte 940 offsets []uint32 941 columnIndex int16 942 } 943 944 func newByteArrayPage(typ Type, columnIndex int16, numValues int32, values encoding.Values) *byteArrayPage { 945 data, offsets := values.ByteArray() 946 return &byteArrayPage{ 947 typ: typ, 948 values: data, 949 offsets: offsets[:numValues+1], 950 columnIndex: ^columnIndex, 951 } 952 } 953 954 func (page *byteArrayPage) Type() Type { return page.typ } 955 956 func (page *byteArrayPage) Column() int { return int(^page.columnIndex) } 957 958 func (page *byteArrayPage) Dictionary() Dictionary { return nil } 959 960 func (page *byteArrayPage) NumRows() int64 { return int64(page.len()) } 961 962 func (page *byteArrayPage) NumValues() int64 { return int64(page.len()) } 963 964 func (page *byteArrayPage) NumNulls() int64 { return 0 } 965 966 func (page *byteArrayPage) Size() int64 { return int64(len(page.values)) + 4*int64(len(page.offsets)) } 967 968 func (page *byteArrayPage) RepetitionLevels() []byte { return nil } 969 970 func (page *byteArrayPage) DefinitionLevels() []byte { return nil } 971 972 func (page *byteArrayPage) Data() encoding.Values { 973 return encoding.ByteArrayValues(page.values, page.offsets) 974 } 975 976 func (page *byteArrayPage) Values() ValueReader { return &byteArrayPageValues{page: page} } 977 978 func (page *byteArrayPage) len() int { return len(page.offsets) - 1 } 979 980 func (page *byteArrayPage) index(i int) []byte { 981 j := page.offsets[i+0] 982 k := page.offsets[i+1] 983 return page.values[j:k:k] 984 } 985 986 func (page *byteArrayPage) min() (min []byte) { 987 if n := page.len(); n > 0 { 988 min = page.index(0) 989 990 for i := 1; i < n; i++ { 991 v := page.index(i) 992 993 if bytes.Compare(v, min) < 0 { 994 min = v 995 } 996 } 997 } 998 return min 999 } 1000 1001 func (page *byteArrayPage) max() (max []byte) { 1002 if n := page.len(); n > 0 { 1003 max = page.index(0) 1004 1005 for i := 1; i < n; i++ { 1006 v := page.index(i) 1007 1008 if bytes.Compare(v, max) > 0 { 1009 max = v 1010 } 1011 } 1012 } 1013 return max 1014 } 1015 1016 func (page *byteArrayPage) bounds() (min, max []byte) { 1017 if n := page.len(); n > 0 { 1018 min = page.index(0) 1019 max = min 1020 1021 for i := 1; i < n; i++ { 1022 v := page.index(i) 1023 1024 switch { 1025 case bytes.Compare(v, min) < 0: 1026 min = v 1027 case bytes.Compare(v, max) > 0: 1028 max = v 1029 } 1030 } 1031 } 1032 return min, max 1033 } 1034 1035 func (page *byteArrayPage) Bounds() (min, max Value, ok bool) { 1036 if ok = len(page.offsets) > 1; ok { 1037 minBytes, maxBytes := page.bounds() 1038 min = page.makeValueBytes(minBytes) 1039 max = page.makeValueBytes(maxBytes) 1040 } 1041 return min, max, ok 1042 } 1043 1044 func (page *byteArrayPage) cloneValues() []byte { 1045 values := make([]byte, len(page.values)) 1046 copy(values, page.values) 1047 return values 1048 } 1049 1050 func (page *byteArrayPage) cloneOffsets() []uint32 { 1051 offsets := make([]uint32, len(page.offsets)) 1052 copy(offsets, page.offsets) 1053 return offsets 1054 } 1055 1056 func (page *byteArrayPage) Slice(i, j int64) Page { 1057 return &byteArrayPage{ 1058 typ: page.typ, 1059 values: page.values, 1060 offsets: page.offsets[i : j+1], 1061 columnIndex: page.columnIndex, 1062 } 1063 } 1064 1065 func (page *byteArrayPage) makeValueBytes(v []byte) Value { 1066 value := makeValueBytes(ByteArray, v) 1067 value.columnIndex = page.columnIndex 1068 return value 1069 } 1070 1071 func (page *byteArrayPage) makeValueString(v string) Value { 1072 value := makeValueString(ByteArray, v) 1073 value.columnIndex = page.columnIndex 1074 return value 1075 } 1076 1077 type fixedLenByteArrayPage struct { 1078 typ Type 1079 data []byte 1080 size int 1081 columnIndex int16 1082 } 1083 1084 func newFixedLenByteArrayPage(typ Type, columnIndex int16, numValues int32, values encoding.Values) *fixedLenByteArrayPage { 1085 data, size := values.FixedLenByteArray() 1086 return &fixedLenByteArrayPage{ 1087 typ: typ, 1088 data: data[:int(numValues)*size], 1089 size: size, 1090 columnIndex: ^columnIndex, 1091 } 1092 } 1093 1094 func (page *fixedLenByteArrayPage) Type() Type { return page.typ } 1095 1096 func (page *fixedLenByteArrayPage) Column() int { return int(^page.columnIndex) } 1097 1098 func (page *fixedLenByteArrayPage) Dictionary() Dictionary { return nil } 1099 1100 func (page *fixedLenByteArrayPage) NumRows() int64 { return int64(len(page.data) / page.size) } 1101 1102 func (page *fixedLenByteArrayPage) NumValues() int64 { return int64(len(page.data) / page.size) } 1103 1104 func (page *fixedLenByteArrayPage) NumNulls() int64 { return 0 } 1105 1106 func (page *fixedLenByteArrayPage) Size() int64 { return int64(len(page.data)) } 1107 1108 func (page *fixedLenByteArrayPage) RepetitionLevels() []byte { return nil } 1109 1110 func (page *fixedLenByteArrayPage) DefinitionLevels() []byte { return nil } 1111 1112 func (page *fixedLenByteArrayPage) Data() encoding.Values { 1113 return encoding.FixedLenByteArrayValues(page.data, page.size) 1114 } 1115 1116 func (page *fixedLenByteArrayPage) Values() ValueReader { 1117 return &fixedLenByteArrayPageValues{page: page} 1118 } 1119 1120 func (page *fixedLenByteArrayPage) min() []byte { return minFixedLenByteArray(page.data, page.size) } 1121 1122 func (page *fixedLenByteArrayPage) max() []byte { return maxFixedLenByteArray(page.data, page.size) } 1123 1124 func (page *fixedLenByteArrayPage) bounds() (min, max []byte) { 1125 return boundsFixedLenByteArray(page.data, page.size) 1126 } 1127 1128 func (page *fixedLenByteArrayPage) Bounds() (min, max Value, ok bool) { 1129 if ok = len(page.data) > 0; ok { 1130 minBytes, maxBytes := page.bounds() 1131 min = page.makeValueBytes(minBytes) 1132 max = page.makeValueBytes(maxBytes) 1133 } 1134 return min, max, ok 1135 } 1136 1137 func (page *fixedLenByteArrayPage) Slice(i, j int64) Page { 1138 return &fixedLenByteArrayPage{ 1139 typ: page.typ, 1140 data: page.data[i*int64(page.size) : j*int64(page.size)], 1141 size: page.size, 1142 columnIndex: page.columnIndex, 1143 } 1144 } 1145 1146 func (page *fixedLenByteArrayPage) makeValueBytes(v []byte) Value { 1147 value := makeValueBytes(FixedLenByteArray, v) 1148 value.columnIndex = page.columnIndex 1149 return value 1150 } 1151 1152 func (page *fixedLenByteArrayPage) makeValueString(v string) Value { 1153 value := makeValueString(FixedLenByteArray, v) 1154 value.columnIndex = page.columnIndex 1155 return value 1156 } 1157 1158 type uint32Page struct { 1159 typ Type 1160 values []uint32 1161 columnIndex int16 1162 } 1163 1164 func newUint32Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *uint32Page { 1165 return &uint32Page{ 1166 typ: typ, 1167 values: values.Uint32()[:numValues], 1168 columnIndex: ^columnIndex, 1169 } 1170 } 1171 1172 func (page *uint32Page) Type() Type { return page.typ } 1173 1174 func (page *uint32Page) Column() int { return int(^page.columnIndex) } 1175 1176 func (page *uint32Page) Dictionary() Dictionary { return nil } 1177 1178 func (page *uint32Page) NumRows() int64 { return int64(len(page.values)) } 1179 1180 func (page *uint32Page) NumValues() int64 { return int64(len(page.values)) } 1181 1182 func (page *uint32Page) NumNulls() int64 { return 0 } 1183 1184 func (page *uint32Page) Size() int64 { return 4 * int64(len(page.values)) } 1185 1186 func (page *uint32Page) RepetitionLevels() []byte { return nil } 1187 1188 func (page *uint32Page) DefinitionLevels() []byte { return nil } 1189 1190 func (page *uint32Page) Data() encoding.Values { return encoding.Uint32Values(page.values) } 1191 1192 func (page *uint32Page) Values() ValueReader { return &uint32PageValues{page: page} } 1193 1194 func (page *uint32Page) min() uint32 { return minUint32(page.values) } 1195 1196 func (page *uint32Page) max() uint32 { return maxUint32(page.values) } 1197 1198 func (page *uint32Page) bounds() (min, max uint32) { return boundsUint32(page.values) } 1199 1200 func (page *uint32Page) Bounds() (min, max Value, ok bool) { 1201 if ok = len(page.values) > 0; ok { 1202 minUint32, maxUint32 := page.bounds() 1203 min = page.makeValue(minUint32) 1204 max = page.makeValue(maxUint32) 1205 } 1206 return min, max, ok 1207 } 1208 1209 func (page *uint32Page) Slice(i, j int64) Page { 1210 return &uint32Page{ 1211 typ: page.typ, 1212 values: page.values[i:j], 1213 columnIndex: page.columnIndex, 1214 } 1215 } 1216 1217 func (page *uint32Page) makeValue(v uint32) Value { 1218 value := makeValueUint32(v) 1219 value.columnIndex = page.columnIndex 1220 return value 1221 } 1222 1223 type uint64Page struct { 1224 typ Type 1225 values []uint64 1226 columnIndex int16 1227 } 1228 1229 func newUint64Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *uint64Page { 1230 return &uint64Page{ 1231 typ: typ, 1232 values: values.Uint64()[:numValues], 1233 columnIndex: ^columnIndex, 1234 } 1235 } 1236 1237 func (page *uint64Page) Type() Type { return page.typ } 1238 1239 func (page *uint64Page) Column() int { return int(^page.columnIndex) } 1240 1241 func (page *uint64Page) Dictionary() Dictionary { return nil } 1242 1243 func (page *uint64Page) NumRows() int64 { return int64(len(page.values)) } 1244 1245 func (page *uint64Page) NumValues() int64 { return int64(len(page.values)) } 1246 1247 func (page *uint64Page) NumNulls() int64 { return 0 } 1248 1249 func (page *uint64Page) Size() int64 { return 8 * int64(len(page.values)) } 1250 1251 func (page *uint64Page) RepetitionLevels() []byte { return nil } 1252 1253 func (page *uint64Page) DefinitionLevels() []byte { return nil } 1254 1255 func (page *uint64Page) Data() encoding.Values { return encoding.Uint64Values(page.values) } 1256 1257 func (page *uint64Page) Values() ValueReader { return &uint64PageValues{page: page} } 1258 1259 func (page *uint64Page) min() uint64 { return minUint64(page.values) } 1260 1261 func (page *uint64Page) max() uint64 { return maxUint64(page.values) } 1262 1263 func (page *uint64Page) bounds() (min, max uint64) { return boundsUint64(page.values) } 1264 1265 func (page *uint64Page) Bounds() (min, max Value, ok bool) { 1266 if ok = len(page.values) > 0; ok { 1267 minUint64, maxUint64 := page.bounds() 1268 min = page.makeValue(minUint64) 1269 max = page.makeValue(maxUint64) 1270 } 1271 return min, max, ok 1272 } 1273 1274 func (page *uint64Page) Slice(i, j int64) Page { 1275 return &uint64Page{ 1276 typ: page.typ, 1277 values: page.values[i:j], 1278 columnIndex: page.columnIndex, 1279 } 1280 } 1281 1282 func (page *uint64Page) makeValue(v uint64) Value { 1283 value := makeValueUint64(v) 1284 value.columnIndex = page.columnIndex 1285 return value 1286 } 1287 1288 type be128Page struct { 1289 typ Type 1290 values [][16]byte 1291 columnIndex int16 1292 } 1293 1294 func newBE128Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *be128Page { 1295 return &be128Page{ 1296 typ: typ, 1297 values: values.Uint128()[:numValues], 1298 columnIndex: ^columnIndex, 1299 } 1300 } 1301 1302 func (page *be128Page) Type() Type { return page.typ } 1303 1304 func (page *be128Page) Column() int { return int(^page.columnIndex) } 1305 1306 func (page *be128Page) Dictionary() Dictionary { return nil } 1307 1308 func (page *be128Page) NumRows() int64 { return int64(len(page.values)) } 1309 1310 func (page *be128Page) NumValues() int64 { return int64(len(page.values)) } 1311 1312 func (page *be128Page) NumNulls() int64 { return 0 } 1313 1314 func (page *be128Page) Size() int64 { return 16 * int64(len(page.values)) } 1315 1316 func (page *be128Page) RepetitionLevels() []byte { return nil } 1317 1318 func (page *be128Page) DefinitionLevels() []byte { return nil } 1319 1320 func (page *be128Page) Data() encoding.Values { return encoding.Uint128Values(page.values) } 1321 1322 func (page *be128Page) Values() ValueReader { return &be128PageValues{page: page} } 1323 1324 func (page *be128Page) min() []byte { return minBE128(page.values) } 1325 1326 func (page *be128Page) max() []byte { return maxBE128(page.values) } 1327 1328 func (page *be128Page) bounds() (min, max []byte) { return boundsBE128(page.values) } 1329 1330 func (page *be128Page) Bounds() (min, max Value, ok bool) { 1331 if ok = len(page.values) > 0; ok { 1332 minBytes, maxBytes := page.bounds() 1333 min = page.makeValueBytes(minBytes) 1334 max = page.makeValueBytes(maxBytes) 1335 } 1336 return min, max, ok 1337 } 1338 1339 func (page *be128Page) Slice(i, j int64) Page { 1340 return &be128Page{ 1341 typ: page.typ, 1342 values: page.values[i:j], 1343 columnIndex: page.columnIndex, 1344 } 1345 } 1346 1347 func (page *be128Page) makeValue(v *[16]byte) Value { 1348 return page.makeValueBytes(v[:]) 1349 } 1350 1351 func (page *be128Page) makeValueBytes(v []byte) Value { 1352 value := makeValueBytes(FixedLenByteArray, v) 1353 value.columnIndex = page.columnIndex 1354 return value 1355 } 1356 1357 func (page *be128Page) makeValueString(v string) Value { 1358 value := makeValueString(FixedLenByteArray, v) 1359 value.columnIndex = page.columnIndex 1360 return value 1361 } 1362 1363 type nullPage struct { 1364 typ Type 1365 column int 1366 count int 1367 } 1368 1369 func newNullPage(typ Type, columnIndex int16, numValues int32) *nullPage { 1370 return &nullPage{ 1371 typ: typ, 1372 column: int(columnIndex), 1373 count: int(numValues), 1374 } 1375 } 1376 1377 func (page *nullPage) Type() Type { return page.typ } 1378 func (page *nullPage) Column() int { return page.column } 1379 func (page *nullPage) Dictionary() Dictionary { return nil } 1380 func (page *nullPage) NumRows() int64 { return int64(page.count) } 1381 func (page *nullPage) NumValues() int64 { return int64(page.count) } 1382 func (page *nullPage) NumNulls() int64 { return int64(page.count) } 1383 func (page *nullPage) Bounds() (min, max Value, ok bool) { return } 1384 func (page *nullPage) Size() int64 { return 1 } 1385 func (page *nullPage) Values() ValueReader { 1386 return &nullPageValues{column: page.column, remain: page.count} 1387 } 1388 func (page *nullPage) Slice(i, j int64) Page { 1389 return &nullPage{column: page.column, count: page.count - int(j-i)} 1390 } 1391 func (page *nullPage) RepetitionLevels() []byte { return nil } 1392 func (page *nullPage) DefinitionLevels() []byte { return nil } 1393 func (page *nullPage) Data() encoding.Values { return encoding.Values{} }