github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/column_buffer.go (about) 1 package parquet 2 3 import ( 4 "bytes" 5 "fmt" 6 "io" 7 "sort" 8 "unsafe" 9 10 "github.com/segmentio/parquet-go/deprecated" 11 "github.com/segmentio/parquet-go/encoding/plain" 12 "github.com/segmentio/parquet-go/internal/bitpack" 13 "github.com/segmentio/parquet-go/internal/unsafecast" 14 "github.com/segmentio/parquet-go/sparse" 15 ) 16 17 // ColumnBuffer is an interface representing columns of a row group. 18 // 19 // ColumnBuffer implements sort.Interface as a way to support reordering the 20 // rows that have been written to it. 21 // 22 // The current implementation has a limitation which prevents applications from 23 // providing custom versions of this interface because it contains unexported 24 // methods. The only way to create ColumnBuffer values is to call the 25 // NewColumnBuffer of Type instances. This limitation may be lifted in future 26 // releases. 27 type ColumnBuffer interface { 28 // Exposes a read-only view of the column buffer. 29 ColumnChunk 30 31 // The column implements ValueReaderAt as a mechanism to read values at 32 // specific locations within the buffer. 33 ValueReaderAt 34 35 // The column implements ValueWriter as a mechanism to optimize the copy 36 // of values into the buffer in contexts where the row information is 37 // provided by the values because the repetition and definition levels 38 // are set. 39 ValueWriter 40 41 // For indexed columns, returns the underlying dictionary holding the column 42 // values. If the column is not indexed, nil is returned. 43 Dictionary() Dictionary 44 45 // Returns a copy of the column. The returned copy shares no memory with 46 // the original, mutations of either column will not modify the other. 47 Clone() ColumnBuffer 48 49 // Returns the column as a Page. 50 Page() Page 51 52 // Clears all rows written to the column. 53 Reset() 54 55 // Returns the current capacity of the column (rows). 56 Cap() int 57 58 // Returns the number of rows currently written to the column. 59 Len() int 60 61 // Compares rows at index i and j and reports whether i < j. 62 Less(i, j int) bool 63 64 // Swaps rows at index i and j. 65 Swap(i, j int) 66 67 // Returns the size of the column buffer in bytes. 68 Size() int64 69 70 // This method is employed to write rows from arrays of Go values into the 71 // column buffer. The method is currently unexported because it uses unsafe 72 // APIs which would be difficult for applications to leverage, increasing 73 // the risk of introducing bugs in the code. As a consequence, applications 74 // cannot use custom implementations of the ColumnBuffer interface since 75 // they cannot declare an unexported method that would match this signature. 76 // It means that in order to create a ColumnBuffer value, programs need to 77 // go through a call to NewColumnBuffer on a Type instance. We make this 78 // trade off for now as it is preferrable to optimize for safety over 79 // extensibility in the public APIs, we might revisit in the future if we 80 // learn about valid use cases for custom column buffer types. 81 writeValues(rows sparse.Array, levels columnLevels) 82 } 83 84 type columnLevels struct { 85 repetitionDepth byte 86 repetitionLevel byte 87 definitionLevel byte 88 } 89 90 func columnIndexOfNullable(base ColumnBuffer, maxDefinitionLevel byte, definitionLevels []byte) ColumnIndex { 91 return &nullableColumnIndex{ 92 ColumnIndex: base.ColumnIndex(), 93 maxDefinitionLevel: maxDefinitionLevel, 94 definitionLevels: definitionLevels, 95 } 96 } 97 98 type nullableColumnIndex struct { 99 ColumnIndex 100 maxDefinitionLevel byte 101 definitionLevels []byte 102 } 103 104 func (index *nullableColumnIndex) NullPage(i int) bool { 105 return index.NullCount(i) == int64(len(index.definitionLevels)) 106 } 107 108 func (index *nullableColumnIndex) NullCount(i int) int64 { 109 return int64(countLevelsNotEqual(index.definitionLevels, index.maxDefinitionLevel)) 110 } 111 112 type nullOrdering func(column ColumnBuffer, i, j int, maxDefinitionLevel, definitionLevel1, definitionLevel2 byte) bool 113 114 func nullsGoFirst(column ColumnBuffer, i, j int, maxDefinitionLevel, definitionLevel1, definitionLevel2 byte) bool { 115 if definitionLevel1 != maxDefinitionLevel { 116 return definitionLevel2 == maxDefinitionLevel 117 } else { 118 return definitionLevel2 == maxDefinitionLevel && column.Less(i, j) 119 } 120 } 121 122 func nullsGoLast(column ColumnBuffer, i, j int, maxDefinitionLevel, definitionLevel1, definitionLevel2 byte) bool { 123 return definitionLevel1 == maxDefinitionLevel && (definitionLevel2 != maxDefinitionLevel || column.Less(i, j)) 124 } 125 126 // reversedColumnBuffer is an adapter of ColumnBuffer which inverses the order 127 // in which rows are ordered when the column gets sorted. 128 // 129 // This type is used when buffers are constructed with sorting columns ordering 130 // values in descending order. 131 type reversedColumnBuffer struct{ ColumnBuffer } 132 133 func (col *reversedColumnBuffer) Less(i, j int) bool { return col.ColumnBuffer.Less(j, i) } 134 135 // optionalColumnBuffer is an implementation of the ColumnBuffer interface used 136 // as a wrapper to an underlying ColumnBuffer to manage the creation of 137 // definition levels. 138 // 139 // Null values are not written to the underlying column; instead, the buffer 140 // tracks offsets of row values in the column, null row values are represented 141 // by the value -1 and a definition level less than the max. 142 // 143 // This column buffer type is used for all leaf columns that have a non-zero 144 // max definition level and a zero repetition level, which may be because the 145 // column or one of its parent(s) are marked optional. 146 type optionalColumnBuffer struct { 147 base ColumnBuffer 148 reordered bool 149 maxDefinitionLevel byte 150 rows []int32 151 sortIndex []int32 152 definitionLevels []byte 153 nullOrdering nullOrdering 154 } 155 156 func newOptionalColumnBuffer(base ColumnBuffer, maxDefinitionLevel byte, nullOrdering nullOrdering) *optionalColumnBuffer { 157 n := base.Cap() 158 return &optionalColumnBuffer{ 159 base: base, 160 maxDefinitionLevel: maxDefinitionLevel, 161 rows: make([]int32, 0, n), 162 definitionLevels: make([]byte, 0, n), 163 nullOrdering: nullOrdering, 164 } 165 } 166 167 func (col *optionalColumnBuffer) Clone() ColumnBuffer { 168 return &optionalColumnBuffer{ 169 base: col.base.Clone(), 170 reordered: col.reordered, 171 maxDefinitionLevel: col.maxDefinitionLevel, 172 rows: append([]int32{}, col.rows...), 173 definitionLevels: append([]byte{}, col.definitionLevels...), 174 nullOrdering: col.nullOrdering, 175 } 176 } 177 178 func (col *optionalColumnBuffer) Type() Type { 179 return col.base.Type() 180 } 181 182 func (col *optionalColumnBuffer) NumValues() int64 { 183 return int64(len(col.definitionLevels)) 184 } 185 186 func (col *optionalColumnBuffer) ColumnIndex() ColumnIndex { 187 return columnIndexOfNullable(col.base, col.maxDefinitionLevel, col.definitionLevels) 188 } 189 190 func (col *optionalColumnBuffer) OffsetIndex() OffsetIndex { 191 return col.base.OffsetIndex() 192 } 193 194 func (col *optionalColumnBuffer) BloomFilter() BloomFilter { 195 return col.base.BloomFilter() 196 } 197 198 func (col *optionalColumnBuffer) Dictionary() Dictionary { 199 return col.base.Dictionary() 200 } 201 202 func (col *optionalColumnBuffer) Column() int { 203 return col.base.Column() 204 } 205 206 func (col *optionalColumnBuffer) Pages() Pages { 207 return onePage(col.Page()) 208 } 209 210 func (col *optionalColumnBuffer) Page() Page { 211 // No need for any cyclic sorting if the rows have not been reordered. 212 // This case is also important because the cyclic sorting modifies the 213 // buffer which makes it unsafe to read the buffer concurrently. 214 if col.reordered { 215 numNulls := countLevelsNotEqual(col.definitionLevels, col.maxDefinitionLevel) 216 numValues := len(col.rows) - numNulls 217 218 if numValues > 0 { 219 if cap(col.sortIndex) < numValues { 220 col.sortIndex = make([]int32, numValues) 221 } 222 sortIndex := col.sortIndex[:numValues] 223 i := 0 224 for _, j := range col.rows { 225 if j >= 0 { 226 sortIndex[j] = int32(i) 227 i++ 228 } 229 } 230 231 // Cyclic sort: O(N) 232 for i := range sortIndex { 233 for j := int(sortIndex[i]); i != j; j = int(sortIndex[i]) { 234 col.base.Swap(i, j) 235 sortIndex[i], sortIndex[j] = sortIndex[j], sortIndex[i] 236 } 237 } 238 } 239 240 i := 0 241 for _, r := range col.rows { 242 if r >= 0 { 243 col.rows[i] = int32(i) 244 i++ 245 } 246 } 247 248 col.reordered = false 249 } 250 251 return newOptionalPage(col.base.Page(), col.maxDefinitionLevel, col.definitionLevels) 252 } 253 254 func (col *optionalColumnBuffer) Reset() { 255 col.base.Reset() 256 col.rows = col.rows[:0] 257 col.definitionLevels = col.definitionLevels[:0] 258 } 259 260 func (col *optionalColumnBuffer) Size() int64 { 261 return int64(4*len(col.rows)+4*len(col.sortIndex)+len(col.definitionLevels)) + col.base.Size() 262 } 263 264 func (col *optionalColumnBuffer) Cap() int { return cap(col.rows) } 265 266 func (col *optionalColumnBuffer) Len() int { return len(col.rows) } 267 268 func (col *optionalColumnBuffer) Less(i, j int) bool { 269 return col.nullOrdering( 270 col.base, 271 int(col.rows[i]), 272 int(col.rows[j]), 273 col.maxDefinitionLevel, 274 col.definitionLevels[i], 275 col.definitionLevels[j], 276 ) 277 } 278 279 func (col *optionalColumnBuffer) Swap(i, j int) { 280 // Because the underlying column does not contain null values, we cannot 281 // swap its values at indexes i and j. We swap the row indexes only, then 282 // reorder the underlying buffer using a cyclic sort when the buffer is 283 // materialized into a page view. 284 col.reordered = true 285 col.rows[i], col.rows[j] = col.rows[j], col.rows[i] 286 col.definitionLevels[i], col.definitionLevels[j] = col.definitionLevels[j], col.definitionLevels[i] 287 } 288 289 func (col *optionalColumnBuffer) WriteValues(values []Value) (n int, err error) { 290 rowIndex := int32(col.base.Len()) 291 292 for n < len(values) { 293 // Collect index range of contiguous null values, from i to n. If this 294 // for loop exhausts the values, all remaining if statements and for 295 // loops will be no-ops and the loop will terminate. 296 i := n 297 for n < len(values) && values[n].definitionLevel != col.maxDefinitionLevel { 298 n++ 299 } 300 301 // Write the contiguous null values up until the first non-null value 302 // obtained in the for loop above. 303 for _, v := range values[i:n] { 304 col.rows = append(col.rows, -1) 305 col.definitionLevels = append(col.definitionLevels, v.definitionLevel) 306 } 307 308 // Collect index range of contiguous non-null values, from i to n. 309 i = n 310 for n < len(values) && values[n].definitionLevel == col.maxDefinitionLevel { 311 n++ 312 } 313 314 // As long as i < n we have non-null values still to write. It is 315 // possible that we just exhausted the input values in which case i == n 316 // and the outer for loop will terminate. 317 if i < n { 318 count, err := col.base.WriteValues(values[i:n]) 319 col.definitionLevels = appendLevel(col.definitionLevels, col.maxDefinitionLevel, count) 320 321 for count > 0 { 322 col.rows = append(col.rows, rowIndex) 323 rowIndex++ 324 count-- 325 } 326 327 if err != nil { 328 return n, err 329 } 330 } 331 } 332 return n, nil 333 } 334 335 func (col *optionalColumnBuffer) writeValues(rows sparse.Array, levels columnLevels) { 336 // The row count is zero when writing an null optional value, in which case 337 // we still need to output a row to the buffer to record the definition 338 // level. 339 if rows.Len() == 0 { 340 col.definitionLevels = append(col.definitionLevels, levels.definitionLevel) 341 col.rows = append(col.rows, -1) 342 return 343 } 344 345 col.definitionLevels = appendLevel(col.definitionLevels, levels.definitionLevel, rows.Len()) 346 347 i := len(col.rows) 348 j := len(col.rows) + rows.Len() 349 350 if j <= cap(col.rows) { 351 col.rows = col.rows[:j] 352 } else { 353 tmp := make([]int32, j, 2*j) 354 copy(tmp, col.rows) 355 col.rows = tmp 356 } 357 358 if levels.definitionLevel != col.maxDefinitionLevel { 359 broadcastValueInt32(col.rows[i:], -1) 360 } else { 361 broadcastRangeInt32(col.rows[i:], int32(col.base.Len())) 362 col.base.writeValues(rows, levels) 363 } 364 } 365 366 func (col *optionalColumnBuffer) ReadValuesAt(values []Value, offset int64) (int, error) { 367 length := int64(len(col.definitionLevels)) 368 if offset < 0 { 369 return 0, errRowIndexOutOfBounds(offset, length) 370 } 371 if offset >= length { 372 return 0, io.EOF 373 } 374 if length -= offset; length < int64(len(values)) { 375 values = values[:length] 376 } 377 378 numNulls1 := int64(countLevelsNotEqual(col.definitionLevels[:offset], col.maxDefinitionLevel)) 379 numNulls2 := int64(countLevelsNotEqual(col.definitionLevels[offset:offset+length], col.maxDefinitionLevel)) 380 381 if numNulls2 < length { 382 n, err := col.base.ReadValuesAt(values[:length-numNulls2], offset-numNulls1) 383 if err != nil { 384 return n, err 385 } 386 } 387 388 if numNulls2 > 0 { 389 columnIndex := ^int16(col.Column()) 390 i := numNulls2 - 1 391 j := length - 1 392 definitionLevels := col.definitionLevels[offset : offset+length] 393 maxDefinitionLevel := col.maxDefinitionLevel 394 395 for n := len(definitionLevels) - 1; n >= 0 && j > i; n-- { 396 if definitionLevels[n] != maxDefinitionLevel { 397 values[j] = Value{definitionLevel: definitionLevels[n], columnIndex: columnIndex} 398 } else { 399 values[j] = values[i] 400 i-- 401 } 402 j-- 403 } 404 } 405 406 return int(length), nil 407 } 408 409 // repeatedColumnBuffer is an implementation of the ColumnBuffer interface used 410 // as a wrapper to an underlying ColumnBuffer to manage the creation of 411 // repetition levels, definition levels, and map rows to the region of the 412 // underlying buffer that contains their sequence of values. 413 // 414 // Null values are not written to the underlying column; instead, the buffer 415 // tracks offsets of row values in the column, null row values are represented 416 // by the value -1 and a definition level less than the max. 417 // 418 // This column buffer type is used for all leaf columns that have a non-zero 419 // max repetition level, which may be because the column or one of its parent(s) 420 // are marked repeated. 421 type repeatedColumnBuffer struct { 422 base ColumnBuffer 423 reordered bool 424 maxRepetitionLevel byte 425 maxDefinitionLevel byte 426 rows []offsetMapping 427 repetitionLevels []byte 428 definitionLevels []byte 429 buffer []Value 430 reordering *repeatedColumnBuffer 431 nullOrdering nullOrdering 432 } 433 434 // The offsetMapping type maps the logical offset of rows within the repetition 435 // and definition levels, to the base offsets in the underlying column buffers 436 // where the non-null values have been written. 437 type offsetMapping struct { 438 offset uint32 439 baseOffset uint32 440 } 441 442 func newRepeatedColumnBuffer(base ColumnBuffer, maxRepetitionLevel, maxDefinitionLevel byte, nullOrdering nullOrdering) *repeatedColumnBuffer { 443 n := base.Cap() 444 return &repeatedColumnBuffer{ 445 base: base, 446 maxRepetitionLevel: maxRepetitionLevel, 447 maxDefinitionLevel: maxDefinitionLevel, 448 rows: make([]offsetMapping, 0, n/8), 449 repetitionLevels: make([]byte, 0, n), 450 definitionLevels: make([]byte, 0, n), 451 nullOrdering: nullOrdering, 452 } 453 } 454 455 func (col *repeatedColumnBuffer) Clone() ColumnBuffer { 456 return &repeatedColumnBuffer{ 457 base: col.base.Clone(), 458 reordered: col.reordered, 459 maxRepetitionLevel: col.maxRepetitionLevel, 460 maxDefinitionLevel: col.maxDefinitionLevel, 461 rows: append([]offsetMapping{}, col.rows...), 462 repetitionLevels: append([]byte{}, col.repetitionLevels...), 463 definitionLevels: append([]byte{}, col.definitionLevels...), 464 nullOrdering: col.nullOrdering, 465 } 466 } 467 468 func (col *repeatedColumnBuffer) Type() Type { 469 return col.base.Type() 470 } 471 472 func (col *repeatedColumnBuffer) NumValues() int64 { 473 return int64(len(col.definitionLevels)) 474 } 475 476 func (col *repeatedColumnBuffer) ColumnIndex() ColumnIndex { 477 return columnIndexOfNullable(col.base, col.maxDefinitionLevel, col.definitionLevels) 478 } 479 480 func (col *repeatedColumnBuffer) OffsetIndex() OffsetIndex { 481 return col.base.OffsetIndex() 482 } 483 484 func (col *repeatedColumnBuffer) BloomFilter() BloomFilter { 485 return col.base.BloomFilter() 486 } 487 488 func (col *repeatedColumnBuffer) Dictionary() Dictionary { 489 return col.base.Dictionary() 490 } 491 492 func (col *repeatedColumnBuffer) Column() int { 493 return col.base.Column() 494 } 495 496 func (col *repeatedColumnBuffer) Pages() Pages { 497 return onePage(col.Page()) 498 } 499 500 func (col *repeatedColumnBuffer) Page() Page { 501 if col.reordered { 502 if col.reordering == nil { 503 col.reordering = col.Clone().(*repeatedColumnBuffer) 504 } 505 506 column := col.reordering 507 column.Reset() 508 maxNumValues := 0 509 defer func() { 510 clearValues(col.buffer[:maxNumValues]) 511 }() 512 513 baseOffset := 0 514 515 for _, row := range col.rows { 516 rowOffset := int(row.offset) 517 rowLength := repeatedRowLength(col.repetitionLevels[rowOffset:]) 518 numNulls := countLevelsNotEqual(col.definitionLevels[rowOffset:rowOffset+rowLength], col.maxDefinitionLevel) 519 numValues := rowLength - numNulls 520 521 if numValues > 0 { 522 if numValues > cap(col.buffer) { 523 col.buffer = make([]Value, numValues) 524 } else { 525 col.buffer = col.buffer[:numValues] 526 } 527 n, err := col.base.ReadValuesAt(col.buffer, int64(row.baseOffset)) 528 if err != nil && n < numValues { 529 return newErrorPage(col.Type(), col.Column(), "reordering rows of repeated column: %w", err) 530 } 531 if _, err := column.base.WriteValues(col.buffer); err != nil { 532 return newErrorPage(col.Type(), col.Column(), "reordering rows of repeated column: %w", err) 533 } 534 if numValues > maxNumValues { 535 maxNumValues = numValues 536 } 537 } 538 539 column.rows = append(column.rows, offsetMapping{ 540 offset: uint32(len(column.repetitionLevels)), 541 baseOffset: uint32(baseOffset), 542 }) 543 544 column.repetitionLevels = append(column.repetitionLevels, col.repetitionLevels[rowOffset:rowOffset+rowLength]...) 545 column.definitionLevels = append(column.definitionLevels, col.definitionLevels[rowOffset:rowOffset+rowLength]...) 546 baseOffset += numValues 547 } 548 549 col.swapReorderingBuffer(column) 550 col.reordered = false 551 } 552 553 return newRepeatedPage( 554 col.base.Page(), 555 col.maxRepetitionLevel, 556 col.maxDefinitionLevel, 557 col.repetitionLevels, 558 col.definitionLevels, 559 ) 560 } 561 562 func (col *repeatedColumnBuffer) swapReorderingBuffer(buf *repeatedColumnBuffer) { 563 col.base, buf.base = buf.base, col.base 564 col.rows, buf.rows = buf.rows, col.rows 565 col.repetitionLevels, buf.repetitionLevels = buf.repetitionLevels, col.repetitionLevels 566 col.definitionLevels, buf.definitionLevels = buf.definitionLevels, col.definitionLevels 567 } 568 569 func (col *repeatedColumnBuffer) Reset() { 570 col.base.Reset() 571 col.rows = col.rows[:0] 572 col.repetitionLevels = col.repetitionLevels[:0] 573 col.definitionLevels = col.definitionLevels[:0] 574 } 575 576 func (col *repeatedColumnBuffer) Size() int64 { 577 return int64(8*len(col.rows)+len(col.repetitionLevels)+len(col.definitionLevels)) + col.base.Size() 578 } 579 580 func (col *repeatedColumnBuffer) Cap() int { return cap(col.rows) } 581 582 func (col *repeatedColumnBuffer) Len() int { return len(col.rows) } 583 584 func (col *repeatedColumnBuffer) Less(i, j int) bool { 585 row1 := col.rows[i] 586 row2 := col.rows[j] 587 less := col.nullOrdering 588 row1Length := repeatedRowLength(col.repetitionLevels[row1.offset:]) 589 row2Length := repeatedRowLength(col.repetitionLevels[row2.offset:]) 590 591 for k := 0; k < row1Length && k < row2Length; k++ { 592 x := int(row1.baseOffset) 593 y := int(row2.baseOffset) 594 definitionLevel1 := col.definitionLevels[int(row1.offset)+k] 595 definitionLevel2 := col.definitionLevels[int(row2.offset)+k] 596 switch { 597 case less(col.base, x, y, col.maxDefinitionLevel, definitionLevel1, definitionLevel2): 598 return true 599 case less(col.base, y, x, col.maxDefinitionLevel, definitionLevel2, definitionLevel1): 600 return false 601 } 602 } 603 604 return row1Length < row2Length 605 } 606 607 func (col *repeatedColumnBuffer) Swap(i, j int) { 608 // Because the underlying column does not contain null values, and may hold 609 // an arbitrary number of values per row, we cannot swap its values at 610 // indexes i and j. We swap the row indexes only, then reorder the base 611 // column buffer when its view is materialized into a page by creating a 612 // copy and writing rows back to it following the order of rows in the 613 // repeated column buffer. 614 col.reordered = true 615 col.rows[i], col.rows[j] = col.rows[j], col.rows[i] 616 } 617 618 func (col *repeatedColumnBuffer) WriteValues(values []Value) (numValues int, err error) { 619 maxRowLen := 0 620 defer func() { 621 clearValues(col.buffer[:maxRowLen]) 622 }() 623 624 for i := 0; i < len(values); { 625 j := i 626 627 if values[j].repetitionLevel == 0 { 628 j++ 629 } 630 631 for j < len(values) && values[j].repetitionLevel != 0 { 632 j++ 633 } 634 635 if err := col.writeRow(values[i:j]); err != nil { 636 return numValues, err 637 } 638 639 if len(col.buffer) > maxRowLen { 640 maxRowLen = len(col.buffer) 641 } 642 643 numValues += j - i 644 i = j 645 } 646 647 return numValues, nil 648 } 649 650 func (col *repeatedColumnBuffer) writeRow(row []Value) error { 651 col.buffer = col.buffer[:0] 652 653 for _, v := range row { 654 if v.definitionLevel == col.maxDefinitionLevel { 655 col.buffer = append(col.buffer, v) 656 } 657 } 658 659 baseOffset := col.base.NumValues() 660 if len(col.buffer) > 0 { 661 if _, err := col.base.WriteValues(col.buffer); err != nil { 662 return err 663 } 664 } 665 666 if row[0].repetitionLevel == 0 { 667 col.rows = append(col.rows, offsetMapping{ 668 offset: uint32(len(col.repetitionLevels)), 669 baseOffset: uint32(baseOffset), 670 }) 671 } 672 673 for _, v := range row { 674 col.repetitionLevels = append(col.repetitionLevels, v.repetitionLevel) 675 col.definitionLevels = append(col.definitionLevels, v.definitionLevel) 676 } 677 678 return nil 679 } 680 681 func (col *repeatedColumnBuffer) writeValues(row sparse.Array, levels columnLevels) { 682 if levels.repetitionLevel == 0 { 683 col.rows = append(col.rows, offsetMapping{ 684 offset: uint32(len(col.repetitionLevels)), 685 baseOffset: uint32(col.base.NumValues()), 686 }) 687 } 688 689 if row.Len() == 0 { 690 col.repetitionLevels = append(col.repetitionLevels, levels.repetitionLevel) 691 col.definitionLevels = append(col.definitionLevels, levels.definitionLevel) 692 return 693 } 694 695 col.repetitionLevels = appendLevel(col.repetitionLevels, levels.repetitionLevel, row.Len()) 696 col.definitionLevels = appendLevel(col.definitionLevels, levels.definitionLevel, row.Len()) 697 698 if levels.definitionLevel == col.maxDefinitionLevel { 699 col.base.writeValues(row, levels) 700 } 701 } 702 703 func (col *repeatedColumnBuffer) ReadValuesAt(values []Value, offset int64) (int, error) { 704 // TODO: 705 panic("NOT IMPLEMENTED") 706 } 707 708 // repeatedRowLength gives the length of the repeated row starting at the 709 // beginning of the repetitionLevels slice. 710 func repeatedRowLength(repetitionLevels []byte) int { 711 // If a repetition level exists, at least one value is required to represent 712 // the column. 713 if len(repetitionLevels) > 0 { 714 // The subsequent levels will represent the start of a new record when 715 // they go back to zero. 716 if i := bytes.IndexByte(repetitionLevels[1:], 0); i >= 0 { 717 return i + 1 718 } 719 } 720 return len(repetitionLevels) 721 } 722 723 // ============================================================================= 724 // The types below are in-memory implementations of the ColumnBuffer interface 725 // for each parquet type. 726 // 727 // These column buffers are created by calling NewColumnBuffer on parquet.Type 728 // instances; each parquet type manages to construct column buffers of the 729 // appropriate type, which ensures that we are packing as many values as we 730 // can in memory. 731 // 732 // See Type.NewColumnBuffer for details about how these types get created. 733 // ============================================================================= 734 735 type booleanColumnBuffer struct{ booleanPage } 736 737 func newBooleanColumnBuffer(typ Type, columnIndex int16, numValues int32) *booleanColumnBuffer { 738 // Boolean values are bit-packed, we can fit up to 8 values per byte. 739 bufferSize := (numValues + 7) / 8 740 return &booleanColumnBuffer{ 741 booleanPage: booleanPage{ 742 typ: typ, 743 bits: make([]byte, 0, bufferSize), 744 columnIndex: ^columnIndex, 745 }, 746 } 747 } 748 749 func (col *booleanColumnBuffer) Clone() ColumnBuffer { 750 return &booleanColumnBuffer{ 751 booleanPage: booleanPage{ 752 typ: col.typ, 753 bits: append([]byte{}, col.bits...), 754 offset: col.offset, 755 numValues: col.numValues, 756 columnIndex: col.columnIndex, 757 }, 758 } 759 } 760 761 func (col *booleanColumnBuffer) ColumnIndex() ColumnIndex { 762 return booleanColumnIndex{&col.booleanPage} 763 } 764 765 func (col *booleanColumnBuffer) OffsetIndex() OffsetIndex { 766 return booleanOffsetIndex{&col.booleanPage} 767 } 768 769 func (col *booleanColumnBuffer) BloomFilter() BloomFilter { return nil } 770 771 func (col *booleanColumnBuffer) Dictionary() Dictionary { return nil } 772 773 func (col *booleanColumnBuffer) Pages() Pages { return onePage(col.Page()) } 774 775 func (col *booleanColumnBuffer) Page() Page { return &col.booleanPage } 776 777 func (col *booleanColumnBuffer) Reset() { 778 col.bits = col.bits[:0] 779 col.offset = 0 780 col.numValues = 0 781 } 782 783 func (col *booleanColumnBuffer) Cap() int { return 8 * cap(col.bits) } 784 785 func (col *booleanColumnBuffer) Len() int { return int(col.numValues) } 786 787 func (col *booleanColumnBuffer) Less(i, j int) bool { 788 a := col.valueAt(i) 789 b := col.valueAt(j) 790 return a != b && !a 791 } 792 793 func (col *booleanColumnBuffer) valueAt(i int) bool { 794 j := uint32(i) / 8 795 k := uint32(i) % 8 796 return ((col.bits[j] >> k) & 1) != 0 797 } 798 799 func (col *booleanColumnBuffer) setValueAt(i int, v bool) { 800 // `offset` is always zero in the page of a column buffer 801 j := uint32(i) / 8 802 k := uint32(i) % 8 803 x := byte(0) 804 if v { 805 x = 1 806 } 807 col.bits[j] = (col.bits[j] & ^(1 << k)) | (x << k) 808 } 809 810 func (col *booleanColumnBuffer) Swap(i, j int) { 811 a := col.valueAt(i) 812 b := col.valueAt(j) 813 col.setValueAt(i, b) 814 col.setValueAt(j, a) 815 } 816 817 func (col *booleanColumnBuffer) WriteBooleans(values []bool) (int, error) { 818 col.writeValues(sparse.MakeBoolArray(values).UnsafeArray(), columnLevels{}) 819 return len(values), nil 820 } 821 822 func (col *booleanColumnBuffer) WriteValues(values []Value) (int, error) { 823 var model Value 824 col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) 825 return len(values), nil 826 } 827 828 func (col *booleanColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) { 829 numBytes := bitpack.ByteCount(uint(col.numValues) + uint(rows.Len())) 830 if cap(col.bits) < numBytes { 831 col.bits = append(make([]byte, 0, max(numBytes, 2*cap(col.bits))), col.bits...) 832 } 833 col.bits = col.bits[:numBytes] 834 i := 0 835 r := 8 - (int(col.numValues) % 8) 836 bytes := rows.Uint8Array() 837 838 if r <= bytes.Len() { 839 // First we attempt to write enough bits to align the number of values 840 // in the column buffer on 8 bytes. After this step the next bit should 841 // be written at the zero'th index of a byte of the buffer. 842 if r < 8 { 843 var b byte 844 for i < r { 845 v := bytes.Index(i) 846 b |= (v & 1) << uint(i) 847 i++ 848 } 849 x := uint(col.numValues) / 8 850 y := uint(col.numValues) % 8 851 col.bits[x] = (b << y) | (col.bits[x] & ^(0xFF << y)) 852 col.numValues += int32(i) 853 } 854 855 if n := ((bytes.Len() - i) / 8) * 8; n > 0 { 856 // At this stage, we know that that we have at least 8 bits to write 857 // and the bits will be aligned on the address of a byte in the 858 // output buffer. We can work on 8 values per loop iteration, 859 // packing them into a single byte and writing it to the output 860 // buffer. This effectively reduces by 87.5% the number of memory 861 // stores that the program needs to perform to generate the values. 862 i += sparse.GatherBits(col.bits[col.numValues/8:], bytes.Slice(i, i+n)) 863 col.numValues += int32(n) 864 } 865 } 866 867 for i < bytes.Len() { 868 x := uint(col.numValues) / 8 869 y := uint(col.numValues) % 8 870 b := bytes.Index(i) 871 col.bits[x] = ((b & 1) << y) | (col.bits[x] & ^(1 << y)) 872 col.numValues++ 873 i++ 874 } 875 876 col.bits = col.bits[:bitpack.ByteCount(uint(col.numValues))] 877 } 878 879 func (col *booleanColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) { 880 i := int(offset) 881 switch { 882 case i < 0: 883 return 0, errRowIndexOutOfBounds(offset, int64(col.numValues)) 884 case i >= int(col.numValues): 885 return 0, io.EOF 886 default: 887 for n < len(values) && i < int(col.numValues) { 888 values[n] = col.makeValue(col.valueAt(i)) 889 n++ 890 i++ 891 } 892 if n < len(values) { 893 err = io.EOF 894 } 895 return n, err 896 } 897 } 898 899 type int32ColumnBuffer struct{ int32Page } 900 901 func newInt32ColumnBuffer(typ Type, columnIndex int16, numValues int32) *int32ColumnBuffer { 902 return &int32ColumnBuffer{ 903 int32Page: int32Page{ 904 typ: typ, 905 values: make([]int32, 0, numValues), 906 columnIndex: ^columnIndex, 907 }, 908 } 909 } 910 911 func (col *int32ColumnBuffer) Clone() ColumnBuffer { 912 return &int32ColumnBuffer{ 913 int32Page: int32Page{ 914 typ: col.typ, 915 values: append([]int32{}, col.values...), 916 columnIndex: col.columnIndex, 917 }, 918 } 919 } 920 921 func (col *int32ColumnBuffer) ColumnIndex() ColumnIndex { return int32ColumnIndex{&col.int32Page} } 922 923 func (col *int32ColumnBuffer) OffsetIndex() OffsetIndex { return int32OffsetIndex{&col.int32Page} } 924 925 func (col *int32ColumnBuffer) BloomFilter() BloomFilter { return nil } 926 927 func (col *int32ColumnBuffer) Dictionary() Dictionary { return nil } 928 929 func (col *int32ColumnBuffer) Pages() Pages { return onePage(col.Page()) } 930 931 func (col *int32ColumnBuffer) Page() Page { return &col.int32Page } 932 933 func (col *int32ColumnBuffer) Reset() { col.values = col.values[:0] } 934 935 func (col *int32ColumnBuffer) Cap() int { return cap(col.values) } 936 937 func (col *int32ColumnBuffer) Len() int { return len(col.values) } 938 939 func (col *int32ColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] } 940 941 func (col *int32ColumnBuffer) Swap(i, j int) { 942 col.values[i], col.values[j] = col.values[j], col.values[i] 943 } 944 945 func (col *int32ColumnBuffer) Write(b []byte) (int, error) { 946 if (len(b) % 4) != 0 { 947 return 0, fmt.Errorf("cannot write INT32 values from input of size %d", len(b)) 948 } 949 col.values = append(col.values, unsafecast.BytesToInt32(b)...) 950 return len(b), nil 951 } 952 953 func (col *int32ColumnBuffer) WriteInt32s(values []int32) (int, error) { 954 col.values = append(col.values, values...) 955 return len(values), nil 956 } 957 958 func (col *int32ColumnBuffer) WriteValues(values []Value) (int, error) { 959 var model Value 960 col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) 961 return len(values), nil 962 } 963 964 func (col *int32ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) { 965 if n := len(col.values) + rows.Len(); n > cap(col.values) { 966 col.values = append(make([]int32, 0, max(n, 2*cap(col.values))), col.values...) 967 } 968 n := len(col.values) 969 col.values = col.values[:n+rows.Len()] 970 sparse.GatherInt32(col.values[n:], rows.Int32Array()) 971 972 } 973 974 func (col *int32ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) { 975 i := int(offset) 976 switch { 977 case i < 0: 978 return 0, errRowIndexOutOfBounds(offset, int64(len(col.values))) 979 case i >= len(col.values): 980 return 0, io.EOF 981 default: 982 for n < len(values) && i < len(col.values) { 983 values[n] = col.makeValue(col.values[i]) 984 n++ 985 i++ 986 } 987 if n < len(values) { 988 err = io.EOF 989 } 990 return n, err 991 } 992 } 993 994 type int64ColumnBuffer struct{ int64Page } 995 996 func newInt64ColumnBuffer(typ Type, columnIndex int16, numValues int32) *int64ColumnBuffer { 997 return &int64ColumnBuffer{ 998 int64Page: int64Page{ 999 typ: typ, 1000 values: make([]int64, 0, numValues), 1001 columnIndex: ^columnIndex, 1002 }, 1003 } 1004 } 1005 1006 func (col *int64ColumnBuffer) Clone() ColumnBuffer { 1007 return &int64ColumnBuffer{ 1008 int64Page: int64Page{ 1009 typ: col.typ, 1010 values: append([]int64{}, col.values...), 1011 columnIndex: col.columnIndex, 1012 }, 1013 } 1014 } 1015 1016 func (col *int64ColumnBuffer) ColumnIndex() ColumnIndex { return int64ColumnIndex{&col.int64Page} } 1017 1018 func (col *int64ColumnBuffer) OffsetIndex() OffsetIndex { return int64OffsetIndex{&col.int64Page} } 1019 1020 func (col *int64ColumnBuffer) BloomFilter() BloomFilter { return nil } 1021 1022 func (col *int64ColumnBuffer) Dictionary() Dictionary { return nil } 1023 1024 func (col *int64ColumnBuffer) Pages() Pages { return onePage(col.Page()) } 1025 1026 func (col *int64ColumnBuffer) Page() Page { return &col.int64Page } 1027 1028 func (col *int64ColumnBuffer) Reset() { col.values = col.values[:0] } 1029 1030 func (col *int64ColumnBuffer) Cap() int { return cap(col.values) } 1031 1032 func (col *int64ColumnBuffer) Len() int { return len(col.values) } 1033 1034 func (col *int64ColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] } 1035 1036 func (col *int64ColumnBuffer) Swap(i, j int) { 1037 col.values[i], col.values[j] = col.values[j], col.values[i] 1038 } 1039 1040 func (col *int64ColumnBuffer) Write(b []byte) (int, error) { 1041 if (len(b) % 8) != 0 { 1042 return 0, fmt.Errorf("cannot write INT64 values from input of size %d", len(b)) 1043 } 1044 col.values = append(col.values, unsafecast.BytesToInt64(b)...) 1045 return len(b), nil 1046 } 1047 1048 func (col *int64ColumnBuffer) WriteInt64s(values []int64) (int, error) { 1049 col.values = append(col.values, values...) 1050 return len(values), nil 1051 } 1052 1053 func (col *int64ColumnBuffer) WriteValues(values []Value) (int, error) { 1054 var model Value 1055 col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) 1056 return len(values), nil 1057 } 1058 1059 func (col *int64ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) { 1060 if n := len(col.values) + rows.Len(); n > cap(col.values) { 1061 col.values = append(make([]int64, 0, max(n, 2*cap(col.values))), col.values...) 1062 } 1063 n := len(col.values) 1064 col.values = col.values[:n+rows.Len()] 1065 sparse.GatherInt64(col.values[n:], rows.Int64Array()) 1066 } 1067 1068 func (col *int64ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) { 1069 i := int(offset) 1070 switch { 1071 case i < 0: 1072 return 0, errRowIndexOutOfBounds(offset, int64(len(col.values))) 1073 case i >= len(col.values): 1074 return 0, io.EOF 1075 default: 1076 for n < len(values) && i < len(col.values) { 1077 values[n] = col.makeValue(col.values[i]) 1078 n++ 1079 i++ 1080 } 1081 if n < len(values) { 1082 err = io.EOF 1083 } 1084 return n, err 1085 } 1086 } 1087 1088 type int96ColumnBuffer struct{ int96Page } 1089 1090 func newInt96ColumnBuffer(typ Type, columnIndex int16, numValues int32) *int96ColumnBuffer { 1091 return &int96ColumnBuffer{ 1092 int96Page: int96Page{ 1093 typ: typ, 1094 values: make([]deprecated.Int96, 0, numValues), 1095 columnIndex: ^columnIndex, 1096 }, 1097 } 1098 } 1099 1100 func (col *int96ColumnBuffer) Clone() ColumnBuffer { 1101 return &int96ColumnBuffer{ 1102 int96Page: int96Page{ 1103 typ: col.typ, 1104 values: append([]deprecated.Int96{}, col.values...), 1105 columnIndex: col.columnIndex, 1106 }, 1107 } 1108 } 1109 1110 func (col *int96ColumnBuffer) ColumnIndex() ColumnIndex { return int96ColumnIndex{&col.int96Page} } 1111 1112 func (col *int96ColumnBuffer) OffsetIndex() OffsetIndex { return int96OffsetIndex{&col.int96Page} } 1113 1114 func (col *int96ColumnBuffer) BloomFilter() BloomFilter { return nil } 1115 1116 func (col *int96ColumnBuffer) Dictionary() Dictionary { return nil } 1117 1118 func (col *int96ColumnBuffer) Pages() Pages { return onePage(col.Page()) } 1119 1120 func (col *int96ColumnBuffer) Page() Page { return &col.int96Page } 1121 1122 func (col *int96ColumnBuffer) Reset() { col.values = col.values[:0] } 1123 1124 func (col *int96ColumnBuffer) Cap() int { return cap(col.values) } 1125 1126 func (col *int96ColumnBuffer) Len() int { return len(col.values) } 1127 1128 func (col *int96ColumnBuffer) Less(i, j int) bool { return col.values[i].Less(col.values[j]) } 1129 1130 func (col *int96ColumnBuffer) Swap(i, j int) { 1131 col.values[i], col.values[j] = col.values[j], col.values[i] 1132 } 1133 1134 func (col *int96ColumnBuffer) Write(b []byte) (int, error) { 1135 if (len(b) % 12) != 0 { 1136 return 0, fmt.Errorf("cannot write INT96 values from input of size %d", len(b)) 1137 } 1138 col.values = append(col.values, deprecated.BytesToInt96(b)...) 1139 return len(b), nil 1140 } 1141 1142 func (col *int96ColumnBuffer) WriteInt96s(values []deprecated.Int96) (int, error) { 1143 col.values = append(col.values, values...) 1144 return len(values), nil 1145 } 1146 1147 func (col *int96ColumnBuffer) WriteValues(values []Value) (int, error) { 1148 for _, v := range values { 1149 col.values = append(col.values, v.Int96()) 1150 } 1151 return len(values), nil 1152 } 1153 1154 func (col *int96ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) { 1155 for i := 0; i < rows.Len(); i++ { 1156 p := rows.Index(i) 1157 col.values = append(col.values, *(*deprecated.Int96)(p)) 1158 } 1159 } 1160 1161 func (col *int96ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) { 1162 i := int(offset) 1163 switch { 1164 case i < 0: 1165 return 0, errRowIndexOutOfBounds(offset, int64(len(col.values))) 1166 case i >= len(col.values): 1167 return 0, io.EOF 1168 default: 1169 for n < len(values) && i < len(col.values) { 1170 values[n] = col.makeValue(col.values[i]) 1171 n++ 1172 i++ 1173 } 1174 if n < len(values) { 1175 err = io.EOF 1176 } 1177 return n, err 1178 } 1179 } 1180 1181 type floatColumnBuffer struct{ floatPage } 1182 1183 func newFloatColumnBuffer(typ Type, columnIndex int16, numValues int32) *floatColumnBuffer { 1184 return &floatColumnBuffer{ 1185 floatPage: floatPage{ 1186 typ: typ, 1187 values: make([]float32, 0, numValues), 1188 columnIndex: ^columnIndex, 1189 }, 1190 } 1191 } 1192 1193 func (col *floatColumnBuffer) Clone() ColumnBuffer { 1194 return &floatColumnBuffer{ 1195 floatPage: floatPage{ 1196 typ: col.typ, 1197 values: append([]float32{}, col.values...), 1198 columnIndex: col.columnIndex, 1199 }, 1200 } 1201 } 1202 1203 func (col *floatColumnBuffer) ColumnIndex() ColumnIndex { return floatColumnIndex{&col.floatPage} } 1204 1205 func (col *floatColumnBuffer) OffsetIndex() OffsetIndex { return floatOffsetIndex{&col.floatPage} } 1206 1207 func (col *floatColumnBuffer) BloomFilter() BloomFilter { return nil } 1208 1209 func (col *floatColumnBuffer) Dictionary() Dictionary { return nil } 1210 1211 func (col *floatColumnBuffer) Pages() Pages { return onePage(col.Page()) } 1212 1213 func (col *floatColumnBuffer) Page() Page { return &col.floatPage } 1214 1215 func (col *floatColumnBuffer) Reset() { col.values = col.values[:0] } 1216 1217 func (col *floatColumnBuffer) Cap() int { return cap(col.values) } 1218 1219 func (col *floatColumnBuffer) Len() int { return len(col.values) } 1220 1221 func (col *floatColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] } 1222 1223 func (col *floatColumnBuffer) Swap(i, j int) { 1224 col.values[i], col.values[j] = col.values[j], col.values[i] 1225 } 1226 1227 func (col *floatColumnBuffer) Write(b []byte) (int, error) { 1228 if (len(b) % 4) != 0 { 1229 return 0, fmt.Errorf("cannot write FLOAT values from input of size %d", len(b)) 1230 } 1231 col.values = append(col.values, unsafecast.BytesToFloat32(b)...) 1232 return len(b), nil 1233 } 1234 1235 func (col *floatColumnBuffer) WriteFloats(values []float32) (int, error) { 1236 col.values = append(col.values, values...) 1237 return len(values), nil 1238 } 1239 1240 func (col *floatColumnBuffer) WriteValues(values []Value) (int, error) { 1241 var model Value 1242 col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) 1243 return len(values), nil 1244 } 1245 1246 func (col *floatColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) { 1247 if n := len(col.values) + rows.Len(); n > cap(col.values) { 1248 col.values = append(make([]float32, 0, max(n, 2*cap(col.values))), col.values...) 1249 } 1250 n := len(col.values) 1251 col.values = col.values[:n+rows.Len()] 1252 sparse.GatherFloat32(col.values[n:], rows.Float32Array()) 1253 } 1254 1255 func (col *floatColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) { 1256 i := int(offset) 1257 switch { 1258 case i < 0: 1259 return 0, errRowIndexOutOfBounds(offset, int64(len(col.values))) 1260 case i >= len(col.values): 1261 return 0, io.EOF 1262 default: 1263 for n < len(values) && i < len(col.values) { 1264 values[n] = col.makeValue(col.values[i]) 1265 n++ 1266 i++ 1267 } 1268 if n < len(values) { 1269 err = io.EOF 1270 } 1271 return n, err 1272 } 1273 } 1274 1275 type doubleColumnBuffer struct{ doublePage } 1276 1277 func newDoubleColumnBuffer(typ Type, columnIndex int16, numValues int32) *doubleColumnBuffer { 1278 return &doubleColumnBuffer{ 1279 doublePage: doublePage{ 1280 typ: typ, 1281 values: make([]float64, 0, numValues), 1282 columnIndex: ^columnIndex, 1283 }, 1284 } 1285 } 1286 1287 func (col *doubleColumnBuffer) Clone() ColumnBuffer { 1288 return &doubleColumnBuffer{ 1289 doublePage: doublePage{ 1290 typ: col.typ, 1291 values: append([]float64{}, col.values...), 1292 columnIndex: col.columnIndex, 1293 }, 1294 } 1295 } 1296 1297 func (col *doubleColumnBuffer) ColumnIndex() ColumnIndex { return doubleColumnIndex{&col.doublePage} } 1298 1299 func (col *doubleColumnBuffer) OffsetIndex() OffsetIndex { return doubleOffsetIndex{&col.doublePage} } 1300 1301 func (col *doubleColumnBuffer) BloomFilter() BloomFilter { return nil } 1302 1303 func (col *doubleColumnBuffer) Dictionary() Dictionary { return nil } 1304 1305 func (col *doubleColumnBuffer) Pages() Pages { return onePage(col.Page()) } 1306 1307 func (col *doubleColumnBuffer) Page() Page { return &col.doublePage } 1308 1309 func (col *doubleColumnBuffer) Reset() { col.values = col.values[:0] } 1310 1311 func (col *doubleColumnBuffer) Cap() int { return cap(col.values) } 1312 1313 func (col *doubleColumnBuffer) Len() int { return len(col.values) } 1314 1315 func (col *doubleColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] } 1316 1317 func (col *doubleColumnBuffer) Swap(i, j int) { 1318 col.values[i], col.values[j] = col.values[j], col.values[i] 1319 } 1320 1321 func (col *doubleColumnBuffer) Write(b []byte) (int, error) { 1322 if (len(b) % 8) != 0 { 1323 return 0, fmt.Errorf("cannot write DOUBLE values from input of size %d", len(b)) 1324 } 1325 col.values = append(col.values, unsafecast.BytesToFloat64(b)...) 1326 return len(b), nil 1327 } 1328 1329 func (col *doubleColumnBuffer) WriteDoubles(values []float64) (int, error) { 1330 col.values = append(col.values, values...) 1331 return len(values), nil 1332 } 1333 1334 func (col *doubleColumnBuffer) WriteValues(values []Value) (int, error) { 1335 var model Value 1336 col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) 1337 return len(values), nil 1338 } 1339 1340 func (col *doubleColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) { 1341 if n := len(col.values) + rows.Len(); n > cap(col.values) { 1342 col.values = append(make([]float64, 0, max(n, 2*cap(col.values))), col.values...) 1343 } 1344 n := len(col.values) 1345 col.values = col.values[:n+rows.Len()] 1346 sparse.GatherFloat64(col.values[n:], rows.Float64Array()) 1347 } 1348 1349 func (col *doubleColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) { 1350 i := int(offset) 1351 switch { 1352 case i < 0: 1353 return 0, errRowIndexOutOfBounds(offset, int64(len(col.values))) 1354 case i >= len(col.values): 1355 return 0, io.EOF 1356 default: 1357 for n < len(values) && i < len(col.values) { 1358 values[n] = col.makeValue(col.values[i]) 1359 n++ 1360 i++ 1361 } 1362 if n < len(values) { 1363 err = io.EOF 1364 } 1365 return n, err 1366 } 1367 } 1368 1369 type byteArrayColumnBuffer struct { 1370 byteArrayPage 1371 lengths []uint32 1372 scratch []byte 1373 } 1374 1375 func newByteArrayColumnBuffer(typ Type, columnIndex int16, numValues int32) *byteArrayColumnBuffer { 1376 return &byteArrayColumnBuffer{ 1377 byteArrayPage: byteArrayPage{ 1378 typ: typ, 1379 values: make([]byte, 0, typ.EstimateSize(int(numValues))), 1380 offsets: make([]uint32, 0, numValues+1), 1381 columnIndex: ^columnIndex, 1382 }, 1383 lengths: make([]uint32, 0, numValues), 1384 } 1385 } 1386 1387 func (col *byteArrayColumnBuffer) Clone() ColumnBuffer { 1388 return &byteArrayColumnBuffer{ 1389 byteArrayPage: byteArrayPage{ 1390 typ: col.typ, 1391 values: col.cloneValues(), 1392 offsets: col.cloneOffsets(), 1393 columnIndex: col.columnIndex, 1394 }, 1395 lengths: col.cloneLengths(), 1396 } 1397 } 1398 1399 func (col *byteArrayColumnBuffer) cloneLengths() []uint32 { 1400 lengths := make([]uint32, len(col.lengths)) 1401 copy(lengths, col.lengths) 1402 return lengths 1403 } 1404 1405 func (col *byteArrayColumnBuffer) ColumnIndex() ColumnIndex { 1406 return byteArrayColumnIndex{&col.byteArrayPage} 1407 } 1408 1409 func (col *byteArrayColumnBuffer) OffsetIndex() OffsetIndex { 1410 return byteArrayOffsetIndex{&col.byteArrayPage} 1411 } 1412 1413 func (col *byteArrayColumnBuffer) BloomFilter() BloomFilter { return nil } 1414 1415 func (col *byteArrayColumnBuffer) Dictionary() Dictionary { return nil } 1416 1417 func (col *byteArrayColumnBuffer) Pages() Pages { return onePage(col.Page()) } 1418 1419 func (col *byteArrayColumnBuffer) Page() Page { 1420 if len(col.lengths) > 0 && orderOfUint32(col.offsets) < 1 { // unordered? 1421 if cap(col.scratch) < len(col.values) { 1422 col.scratch = make([]byte, 0, cap(col.values)) 1423 } else { 1424 col.scratch = col.scratch[:0] 1425 } 1426 1427 for i := range col.lengths { 1428 n := len(col.scratch) 1429 col.scratch = append(col.scratch, col.index(i)...) 1430 col.offsets[i] = uint32(n) 1431 } 1432 1433 col.values, col.scratch = col.scratch, col.values 1434 } 1435 // The offsets have the total length as the last item. Since we are about to 1436 // expose the column buffer's internal state as a Page value we ensure that 1437 // the last offset is the total length of all values. 1438 col.offsets = append(col.offsets[:len(col.lengths)], uint32(len(col.values))) 1439 return &col.byteArrayPage 1440 } 1441 1442 func (col *byteArrayColumnBuffer) Reset() { 1443 col.values = col.values[:0] 1444 col.offsets = col.offsets[:0] 1445 col.lengths = col.lengths[:0] 1446 } 1447 1448 func (col *byteArrayColumnBuffer) NumRows() int64 { return int64(col.Len()) } 1449 1450 func (col *byteArrayColumnBuffer) NumValues() int64 { return int64(col.Len()) } 1451 1452 func (col *byteArrayColumnBuffer) Cap() int { return cap(col.lengths) } 1453 1454 func (col *byteArrayColumnBuffer) Len() int { return len(col.lengths) } 1455 1456 func (col *byteArrayColumnBuffer) Less(i, j int) bool { 1457 return bytes.Compare(col.index(i), col.index(j)) < 0 1458 } 1459 1460 func (col *byteArrayColumnBuffer) Swap(i, j int) { 1461 col.offsets[i], col.offsets[j] = col.offsets[j], col.offsets[i] 1462 col.lengths[i], col.lengths[j] = col.lengths[j], col.lengths[i] 1463 } 1464 1465 func (col *byteArrayColumnBuffer) Write(b []byte) (int, error) { 1466 _, n, err := col.writeByteArrays(b) 1467 return n, err 1468 } 1469 1470 func (col *byteArrayColumnBuffer) WriteByteArrays(values []byte) (int, error) { 1471 n, _, err := col.writeByteArrays(values) 1472 return n, err 1473 } 1474 1475 func (col *byteArrayColumnBuffer) writeByteArrays(values []byte) (count, bytes int, err error) { 1476 baseCount := len(col.lengths) 1477 baseBytes := len(col.values) + (plain.ByteArrayLengthSize * len(col.lengths)) 1478 1479 err = plain.RangeByteArray(values, func(value []byte) error { 1480 col.append(unsafecast.BytesToString(value)) 1481 return nil 1482 }) 1483 1484 count = len(col.lengths) - baseCount 1485 bytes = (len(col.values) - baseBytes) + (plain.ByteArrayLengthSize * count) 1486 return count, bytes, err 1487 } 1488 1489 func (col *byteArrayColumnBuffer) WriteValues(values []Value) (int, error) { 1490 var model Value 1491 col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.ptr)), columnLevels{}) 1492 return len(values), nil 1493 } 1494 1495 func (col *byteArrayColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) { 1496 for i := 0; i < rows.Len(); i++ { 1497 p := rows.Index(i) 1498 col.append(*(*string)(p)) 1499 } 1500 } 1501 1502 func (col *byteArrayColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) { 1503 i := int(offset) 1504 switch { 1505 case i < 0: 1506 return 0, errRowIndexOutOfBounds(offset, int64(len(col.lengths))) 1507 case i >= len(col.lengths): 1508 return 0, io.EOF 1509 default: 1510 for n < len(values) && i < len(col.lengths) { 1511 values[n] = col.makeValueBytes(col.index(i)) 1512 n++ 1513 i++ 1514 } 1515 if n < len(values) { 1516 err = io.EOF 1517 } 1518 return n, err 1519 } 1520 } 1521 1522 func (col *byteArrayColumnBuffer) append(value string) { 1523 col.offsets = append(col.offsets, uint32(len(col.values))) 1524 col.lengths = append(col.lengths, uint32(len(value))) 1525 col.values = append(col.values, value...) 1526 } 1527 1528 func (col *byteArrayColumnBuffer) index(i int) []byte { 1529 offset := col.offsets[i] 1530 length := col.lengths[i] 1531 end := offset + length 1532 return col.values[offset:end:end] 1533 } 1534 1535 type fixedLenByteArrayColumnBuffer struct { 1536 fixedLenByteArrayPage 1537 tmp []byte 1538 } 1539 1540 func newFixedLenByteArrayColumnBuffer(typ Type, columnIndex int16, numValues int32) *fixedLenByteArrayColumnBuffer { 1541 size := typ.Length() 1542 return &fixedLenByteArrayColumnBuffer{ 1543 fixedLenByteArrayPage: fixedLenByteArrayPage{ 1544 typ: typ, 1545 size: size, 1546 data: make([]byte, 0, typ.EstimateSize(int(numValues))), 1547 columnIndex: ^columnIndex, 1548 }, 1549 tmp: make([]byte, size), 1550 } 1551 } 1552 1553 func (col *fixedLenByteArrayColumnBuffer) Clone() ColumnBuffer { 1554 return &fixedLenByteArrayColumnBuffer{ 1555 fixedLenByteArrayPage: fixedLenByteArrayPage{ 1556 typ: col.typ, 1557 size: col.size, 1558 data: append([]byte{}, col.data...), 1559 columnIndex: col.columnIndex, 1560 }, 1561 tmp: make([]byte, col.size), 1562 } 1563 } 1564 1565 func (col *fixedLenByteArrayColumnBuffer) ColumnIndex() ColumnIndex { 1566 return fixedLenByteArrayColumnIndex{&col.fixedLenByteArrayPage} 1567 } 1568 1569 func (col *fixedLenByteArrayColumnBuffer) OffsetIndex() OffsetIndex { 1570 return fixedLenByteArrayOffsetIndex{&col.fixedLenByteArrayPage} 1571 } 1572 1573 func (col *fixedLenByteArrayColumnBuffer) BloomFilter() BloomFilter { return nil } 1574 1575 func (col *fixedLenByteArrayColumnBuffer) Dictionary() Dictionary { return nil } 1576 1577 func (col *fixedLenByteArrayColumnBuffer) Pages() Pages { return onePage(col.Page()) } 1578 1579 func (col *fixedLenByteArrayColumnBuffer) Page() Page { return &col.fixedLenByteArrayPage } 1580 1581 func (col *fixedLenByteArrayColumnBuffer) Reset() { col.data = col.data[:0] } 1582 1583 func (col *fixedLenByteArrayColumnBuffer) Cap() int { return cap(col.data) / col.size } 1584 1585 func (col *fixedLenByteArrayColumnBuffer) Len() int { return len(col.data) / col.size } 1586 1587 func (col *fixedLenByteArrayColumnBuffer) Less(i, j int) bool { 1588 return bytes.Compare(col.index(i), col.index(j)) < 0 1589 } 1590 1591 func (col *fixedLenByteArrayColumnBuffer) Swap(i, j int) { 1592 t, u, v := col.tmp[:col.size], col.index(i), col.index(j) 1593 copy(t, u) 1594 copy(u, v) 1595 copy(v, t) 1596 } 1597 1598 func (col *fixedLenByteArrayColumnBuffer) index(i int) []byte { 1599 j := (i + 0) * col.size 1600 k := (i + 1) * col.size 1601 return col.data[j:k:k] 1602 } 1603 1604 func (col *fixedLenByteArrayColumnBuffer) Write(b []byte) (int, error) { 1605 n, err := col.WriteFixedLenByteArrays(b) 1606 return n * col.size, err 1607 } 1608 1609 func (col *fixedLenByteArrayColumnBuffer) WriteFixedLenByteArrays(values []byte) (int, error) { 1610 d, m := len(values)/col.size, len(values)%col.size 1611 if m != 0 { 1612 return 0, fmt.Errorf("cannot write FIXED_LEN_BYTE_ARRAY values of size %d from input of size %d", col.size, len(values)) 1613 } 1614 col.data = append(col.data, values...) 1615 return d, nil 1616 } 1617 1618 func (col *fixedLenByteArrayColumnBuffer) WriteValues(values []Value) (int, error) { 1619 for _, v := range values { 1620 col.data = append(col.data, v.byteArray()...) 1621 } 1622 return len(values), nil 1623 } 1624 1625 func (col *fixedLenByteArrayColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) { 1626 n := col.size * rows.Len() 1627 i := len(col.data) 1628 j := len(col.data) + n 1629 1630 if cap(col.data) < j { 1631 col.data = append(make([]byte, 0, max(i+n, 2*cap(col.data))), col.data...) 1632 } 1633 1634 col.data = col.data[:j] 1635 newData := col.data[i:] 1636 1637 for i := 0; i < rows.Len(); i++ { 1638 p := rows.Index(i) 1639 copy(newData[i*col.size:], unsafe.Slice((*byte)(p), col.size)) 1640 } 1641 } 1642 1643 func (col *fixedLenByteArrayColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) { 1644 i := int(offset) * col.size 1645 switch { 1646 case i < 0: 1647 return 0, errRowIndexOutOfBounds(offset, int64(len(col.data)/col.size)) 1648 case i >= len(col.data): 1649 return 0, io.EOF 1650 default: 1651 for n < len(values) && i < len(col.data) { 1652 values[n] = col.makeValueBytes(col.data[i : i+col.size]) 1653 n++ 1654 i += col.size 1655 } 1656 if n < len(values) { 1657 err = io.EOF 1658 } 1659 return n, err 1660 } 1661 } 1662 1663 type uint32ColumnBuffer struct{ uint32Page } 1664 1665 func newUint32ColumnBuffer(typ Type, columnIndex int16, numValues int32) *uint32ColumnBuffer { 1666 return &uint32ColumnBuffer{ 1667 uint32Page: uint32Page{ 1668 typ: typ, 1669 values: make([]uint32, 0, numValues), 1670 columnIndex: ^columnIndex, 1671 }, 1672 } 1673 } 1674 1675 func (col *uint32ColumnBuffer) Clone() ColumnBuffer { 1676 return &uint32ColumnBuffer{ 1677 uint32Page: uint32Page{ 1678 typ: col.typ, 1679 values: append([]uint32{}, col.values...), 1680 columnIndex: col.columnIndex, 1681 }, 1682 } 1683 } 1684 1685 func (col *uint32ColumnBuffer) ColumnIndex() ColumnIndex { return uint32ColumnIndex{&col.uint32Page} } 1686 1687 func (col *uint32ColumnBuffer) OffsetIndex() OffsetIndex { return uint32OffsetIndex{&col.uint32Page} } 1688 1689 func (col *uint32ColumnBuffer) BloomFilter() BloomFilter { return nil } 1690 1691 func (col *uint32ColumnBuffer) Dictionary() Dictionary { return nil } 1692 1693 func (col *uint32ColumnBuffer) Pages() Pages { return onePage(col.Page()) } 1694 1695 func (col *uint32ColumnBuffer) Page() Page { return &col.uint32Page } 1696 1697 func (col *uint32ColumnBuffer) Reset() { col.values = col.values[:0] } 1698 1699 func (col *uint32ColumnBuffer) Cap() int { return cap(col.values) } 1700 1701 func (col *uint32ColumnBuffer) Len() int { return len(col.values) } 1702 1703 func (col *uint32ColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] } 1704 1705 func (col *uint32ColumnBuffer) Swap(i, j int) { 1706 col.values[i], col.values[j] = col.values[j], col.values[i] 1707 } 1708 1709 func (col *uint32ColumnBuffer) Write(b []byte) (int, error) { 1710 if (len(b) % 4) != 0 { 1711 return 0, fmt.Errorf("cannot write INT32 values from input of size %d", len(b)) 1712 } 1713 col.values = append(col.values, unsafecast.BytesToUint32(b)...) 1714 return len(b), nil 1715 } 1716 1717 func (col *uint32ColumnBuffer) WriteUint32s(values []uint32) (int, error) { 1718 col.values = append(col.values, values...) 1719 return len(values), nil 1720 } 1721 1722 func (col *uint32ColumnBuffer) WriteValues(values []Value) (int, error) { 1723 var model Value 1724 col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) 1725 return len(values), nil 1726 } 1727 1728 func (col *uint32ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) { 1729 if n := len(col.values) + rows.Len(); n > cap(col.values) { 1730 col.values = append(make([]uint32, 0, max(n, 2*cap(col.values))), col.values...) 1731 } 1732 n := len(col.values) 1733 col.values = col.values[:n+rows.Len()] 1734 sparse.GatherUint32(col.values[n:], rows.Uint32Array()) 1735 } 1736 1737 func (col *uint32ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) { 1738 i := int(offset) 1739 switch { 1740 case i < 0: 1741 return 0, errRowIndexOutOfBounds(offset, int64(len(col.values))) 1742 case i >= len(col.values): 1743 return 0, io.EOF 1744 default: 1745 for n < len(values) && i < len(col.values) { 1746 values[n] = col.makeValue(col.values[i]) 1747 n++ 1748 i++ 1749 } 1750 if n < len(values) { 1751 err = io.EOF 1752 } 1753 return n, err 1754 } 1755 } 1756 1757 type uint64ColumnBuffer struct{ uint64Page } 1758 1759 func newUint64ColumnBuffer(typ Type, columnIndex int16, numValues int32) *uint64ColumnBuffer { 1760 return &uint64ColumnBuffer{ 1761 uint64Page: uint64Page{ 1762 typ: typ, 1763 values: make([]uint64, 0, numValues), 1764 columnIndex: ^columnIndex, 1765 }, 1766 } 1767 } 1768 1769 func (col *uint64ColumnBuffer) Clone() ColumnBuffer { 1770 return &uint64ColumnBuffer{ 1771 uint64Page: uint64Page{ 1772 typ: col.typ, 1773 values: append([]uint64{}, col.values...), 1774 columnIndex: col.columnIndex, 1775 }, 1776 } 1777 } 1778 1779 func (col *uint64ColumnBuffer) ColumnIndex() ColumnIndex { return uint64ColumnIndex{&col.uint64Page} } 1780 1781 func (col *uint64ColumnBuffer) OffsetIndex() OffsetIndex { return uint64OffsetIndex{&col.uint64Page} } 1782 1783 func (col *uint64ColumnBuffer) BloomFilter() BloomFilter { return nil } 1784 1785 func (col *uint64ColumnBuffer) Dictionary() Dictionary { return nil } 1786 1787 func (col *uint64ColumnBuffer) Pages() Pages { return onePage(col.Page()) } 1788 1789 func (col *uint64ColumnBuffer) Page() Page { return &col.uint64Page } 1790 1791 func (col *uint64ColumnBuffer) Reset() { col.values = col.values[:0] } 1792 1793 func (col *uint64ColumnBuffer) Cap() int { return cap(col.values) } 1794 1795 func (col *uint64ColumnBuffer) Len() int { return len(col.values) } 1796 1797 func (col *uint64ColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] } 1798 1799 func (col *uint64ColumnBuffer) Swap(i, j int) { 1800 col.values[i], col.values[j] = col.values[j], col.values[i] 1801 } 1802 1803 func (col *uint64ColumnBuffer) Write(b []byte) (int, error) { 1804 if (len(b) % 8) != 0 { 1805 return 0, fmt.Errorf("cannot write INT64 values from input of size %d", len(b)) 1806 } 1807 col.values = append(col.values, unsafecast.BytesToUint64(b)...) 1808 return len(b), nil 1809 } 1810 1811 func (col *uint64ColumnBuffer) WriteUint64s(values []uint64) (int, error) { 1812 col.values = append(col.values, values...) 1813 return len(values), nil 1814 } 1815 1816 func (col *uint64ColumnBuffer) WriteValues(values []Value) (int, error) { 1817 var model Value 1818 col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{}) 1819 return len(values), nil 1820 } 1821 1822 func (col *uint64ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) { 1823 if n := len(col.values) + rows.Len(); n > cap(col.values) { 1824 col.values = append(make([]uint64, 0, max(n, 2*cap(col.values))), col.values...) 1825 } 1826 n := len(col.values) 1827 col.values = col.values[:n+rows.Len()] 1828 sparse.GatherUint64(col.values[n:], rows.Uint64Array()) 1829 } 1830 1831 func (col *uint64ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) { 1832 i := int(offset) 1833 switch { 1834 case i < 0: 1835 return 0, errRowIndexOutOfBounds(offset, int64(len(col.values))) 1836 case i >= len(col.values): 1837 return 0, io.EOF 1838 default: 1839 for n < len(values) && i < len(col.values) { 1840 values[n] = col.makeValue(col.values[i]) 1841 n++ 1842 i++ 1843 } 1844 if n < len(values) { 1845 err = io.EOF 1846 } 1847 return n, err 1848 } 1849 } 1850 1851 type be128ColumnBuffer struct{ be128Page } 1852 1853 func newBE128ColumnBuffer(typ Type, columnIndex int16, numValues int32) *be128ColumnBuffer { 1854 return &be128ColumnBuffer{ 1855 be128Page: be128Page{ 1856 typ: typ, 1857 values: make([][16]byte, 0, numValues), 1858 columnIndex: ^columnIndex, 1859 }, 1860 } 1861 } 1862 1863 func (col *be128ColumnBuffer) Clone() ColumnBuffer { 1864 return &be128ColumnBuffer{ 1865 be128Page: be128Page{ 1866 typ: col.typ, 1867 values: append([][16]byte{}, col.values...), 1868 columnIndex: col.columnIndex, 1869 }, 1870 } 1871 } 1872 1873 func (col *be128ColumnBuffer) ColumnIndex() ColumnIndex { 1874 return be128ColumnIndex{&col.be128Page} 1875 } 1876 1877 func (col *be128ColumnBuffer) OffsetIndex() OffsetIndex { 1878 return be128OffsetIndex{&col.be128Page} 1879 } 1880 1881 func (col *be128ColumnBuffer) BloomFilter() BloomFilter { return nil } 1882 1883 func (col *be128ColumnBuffer) Dictionary() Dictionary { return nil } 1884 1885 func (col *be128ColumnBuffer) Pages() Pages { return onePage(col.Page()) } 1886 1887 func (col *be128ColumnBuffer) Page() Page { return &col.be128Page } 1888 1889 func (col *be128ColumnBuffer) Reset() { col.values = col.values[:0] } 1890 1891 func (col *be128ColumnBuffer) Cap() int { return cap(col.values) } 1892 1893 func (col *be128ColumnBuffer) Len() int { return len(col.values) } 1894 1895 func (col *be128ColumnBuffer) Less(i, j int) bool { 1896 return lessBE128(&col.values[i], &col.values[j]) 1897 } 1898 1899 func (col *be128ColumnBuffer) Swap(i, j int) { 1900 col.values[i], col.values[j] = col.values[j], col.values[i] 1901 } 1902 1903 func (col *be128ColumnBuffer) WriteValues(values []Value) (int, error) { 1904 if n := len(col.values) + len(values); n > cap(col.values) { 1905 col.values = append(make([][16]byte, 0, max(n, 2*cap(col.values))), col.values...) 1906 } 1907 n := len(col.values) 1908 col.values = col.values[:n+len(values)] 1909 newValues := col.values[n:] 1910 for i, v := range values { 1911 copy(newValues[i][:], v.byteArray()) 1912 } 1913 return len(values), nil 1914 } 1915 1916 func (col *be128ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) { 1917 if n := len(col.values) + rows.Len(); n > cap(col.values) { 1918 col.values = append(make([][16]byte, 0, max(n, 2*cap(col.values))), col.values...) 1919 } 1920 n := len(col.values) 1921 col.values = col.values[:n+rows.Len()] 1922 sparse.GatherUint128(col.values[n:], rows.Uint128Array()) 1923 } 1924 1925 func (col *be128ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) { 1926 i := int(offset) 1927 switch { 1928 case i < 0: 1929 return 0, errRowIndexOutOfBounds(offset, int64(len(col.values))) 1930 case i >= len(col.values): 1931 return 0, io.EOF 1932 default: 1933 for n < len(values) && i < len(col.values) { 1934 values[n] = col.makeValue(&col.values[i]) 1935 n++ 1936 i++ 1937 } 1938 if n < len(values) { 1939 err = io.EOF 1940 } 1941 return n, err 1942 } 1943 } 1944 1945 var ( 1946 _ sort.Interface = (ColumnBuffer)(nil) 1947 _ io.Writer = (*byteArrayColumnBuffer)(nil) 1948 _ io.Writer = (*fixedLenByteArrayColumnBuffer)(nil) 1949 )