github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/column_index.go (about) 1 package parquet 2 3 import ( 4 "github.com/vc42/parquet-go/deprecated" 5 "github.com/vc42/parquet-go/encoding/plain" 6 "github.com/vc42/parquet-go/format" 7 "github.com/vc42/parquet-go/internal/unsafecast" 8 ) 9 10 type ColumnIndex interface { 11 // NumPages returns the number of paged in the column index. 12 NumPages() int 13 14 // Returns the number of null values in the page at the given index. 15 NullCount(int) int64 16 17 // Tells whether the page at the given index contains null values only. 18 NullPage(int) bool 19 20 // PageIndex return min/max bounds for the page at the given index in the 21 // column. 22 MinValue(int) Value 23 MaxValue(int) Value 24 25 // IsAscending returns true if the column index min/max values are sorted 26 // in ascending order (based on the ordering rules of the column's logical 27 // type). 28 IsAscending() bool 29 30 // IsDescending returns true if the column index min/max values are sorted 31 // in descending order (based on the ordering rules of the column's logical 32 // type). 33 IsDescending() bool 34 } 35 36 // NewColumnIndex constructs a ColumnIndex instance from the given parquet 37 // format column index. The kind argument configures the type of values 38 func NewColumnIndex(kind Kind, index *format.ColumnIndex) ColumnIndex { 39 return &formatColumnIndex{ 40 kind: kind, 41 index: index, 42 } 43 } 44 45 type formatColumnIndex struct { 46 kind Kind 47 index *format.ColumnIndex 48 } 49 50 func (f *formatColumnIndex) NumPages() int { 51 return len(f.index.MinValues) 52 } 53 54 func (f *formatColumnIndex) NullCount(i int) int64 { 55 if len(f.index.NullCounts) > 0 { 56 return f.index.NullCounts[i] 57 } 58 return 0 59 } 60 61 func (f *formatColumnIndex) NullPage(i int) bool { 62 return len(f.index.NullPages) > 0 && f.index.NullPages[i] 63 } 64 65 func (f *formatColumnIndex) MinValue(i int) Value { 66 if f.NullPage(i) { 67 return Value{} 68 } 69 return f.kind.Value(f.index.MinValues[i]) 70 } 71 72 func (f *formatColumnIndex) MaxValue(i int) Value { 73 if f.NullPage(i) { 74 return Value{} 75 } 76 return f.kind.Value(f.index.MaxValues[i]) 77 } 78 79 func (f *formatColumnIndex) IsAscending() bool { 80 return f.index.BoundaryOrder == format.Ascending 81 } 82 83 func (f *formatColumnIndex) IsDescending() bool { 84 return f.index.BoundaryOrder == format.Descending 85 } 86 87 type fileColumnIndex struct{ chunk *fileColumnChunk } 88 89 func (i fileColumnIndex) NumPages() int { 90 return len(i.chunk.columnIndex.NullPages) 91 } 92 93 func (i fileColumnIndex) NullCount(j int) int64 { 94 if len(i.chunk.columnIndex.NullCounts) > 0 { 95 return i.chunk.columnIndex.NullCounts[j] 96 } 97 return 0 98 } 99 100 func (i fileColumnIndex) NullPage(j int) bool { 101 return len(i.chunk.columnIndex.NullPages) > 0 && i.chunk.columnIndex.NullPages[j] 102 } 103 104 func (i fileColumnIndex) MinValue(j int) Value { 105 if i.NullPage(j) { 106 return Value{} 107 } 108 return i.makeValue(i.chunk.columnIndex.MinValues[j]) 109 } 110 111 func (i fileColumnIndex) MaxValue(j int) Value { 112 if i.NullPage(j) { 113 return Value{} 114 } 115 return i.makeValue(i.chunk.columnIndex.MaxValues[j]) 116 } 117 118 func (i fileColumnIndex) IsAscending() bool { 119 return i.chunk.columnIndex.BoundaryOrder == format.Ascending 120 } 121 122 func (i fileColumnIndex) IsDescending() bool { 123 return i.chunk.columnIndex.BoundaryOrder == format.Descending 124 } 125 126 func (i *fileColumnIndex) makeValue(b []byte) Value { 127 return i.chunk.column.typ.Kind().Value(b) 128 } 129 130 type emptyColumnIndex struct{} 131 132 func (emptyColumnIndex) NumPages() int { return 0 } 133 func (emptyColumnIndex) NullCount(int) int64 { return 0 } 134 func (emptyColumnIndex) NullPage(int) bool { return false } 135 func (emptyColumnIndex) MinValue(int) Value { return Value{} } 136 func (emptyColumnIndex) MaxValue(int) Value { return Value{} } 137 func (emptyColumnIndex) IsAscending() bool { return false } 138 func (emptyColumnIndex) IsDescending() bool { return false } 139 140 type booleanColumnIndex struct{ page *booleanPage } 141 142 func (i booleanColumnIndex) NumPages() int { return 1 } 143 func (i booleanColumnIndex) NullCount(int) int64 { return 0 } 144 func (i booleanColumnIndex) NullPage(int) bool { return false } 145 func (i booleanColumnIndex) MinValue(int) Value { return makeValueBoolean(i.page.min()) } 146 func (i booleanColumnIndex) MaxValue(int) Value { return makeValueBoolean(i.page.max()) } 147 func (i booleanColumnIndex) IsAscending() bool { return false } 148 func (i booleanColumnIndex) IsDescending() bool { return false } 149 150 type int32ColumnIndex struct{ page *int32Page } 151 152 func (i int32ColumnIndex) NumPages() int { return 1 } 153 func (i int32ColumnIndex) NullCount(int) int64 { return 0 } 154 func (i int32ColumnIndex) NullPage(int) bool { return false } 155 func (i int32ColumnIndex) MinValue(int) Value { return makeValueInt32(i.page.min()) } 156 func (i int32ColumnIndex) MaxValue(int) Value { return makeValueInt32(i.page.max()) } 157 func (i int32ColumnIndex) IsAscending() bool { return false } 158 func (i int32ColumnIndex) IsDescending() bool { return false } 159 160 type int64ColumnIndex struct{ page *int64Page } 161 162 func (i int64ColumnIndex) NumPages() int { return 1 } 163 func (i int64ColumnIndex) NullCount(int) int64 { return 0 } 164 func (i int64ColumnIndex) NullPage(int) bool { return false } 165 func (i int64ColumnIndex) MinValue(int) Value { return makeValueInt64(i.page.min()) } 166 func (i int64ColumnIndex) MaxValue(int) Value { return makeValueInt64(i.page.max()) } 167 func (i int64ColumnIndex) IsAscending() bool { return false } 168 func (i int64ColumnIndex) IsDescending() bool { return false } 169 170 type int96ColumnIndex struct{ page *int96Page } 171 172 func (i int96ColumnIndex) NumPages() int { return 1 } 173 func (i int96ColumnIndex) NullCount(int) int64 { return 0 } 174 func (i int96ColumnIndex) NullPage(int) bool { return false } 175 func (i int96ColumnIndex) MinValue(int) Value { return makeValueInt96(i.page.min()) } 176 func (i int96ColumnIndex) MaxValue(int) Value { return makeValueInt96(i.page.max()) } 177 func (i int96ColumnIndex) IsAscending() bool { return false } 178 func (i int96ColumnIndex) IsDescending() bool { return false } 179 180 type floatColumnIndex struct{ page *floatPage } 181 182 func (i floatColumnIndex) NumPages() int { return 1 } 183 func (i floatColumnIndex) NullCount(int) int64 { return 0 } 184 func (i floatColumnIndex) NullPage(int) bool { return false } 185 func (i floatColumnIndex) MinValue(int) Value { return makeValueFloat(i.page.min()) } 186 func (i floatColumnIndex) MaxValue(int) Value { return makeValueFloat(i.page.max()) } 187 func (i floatColumnIndex) IsAscending() bool { return false } 188 func (i floatColumnIndex) IsDescending() bool { return false } 189 190 type doubleColumnIndex struct{ page *doublePage } 191 192 func (i doubleColumnIndex) NumPages() int { return 1 } 193 func (i doubleColumnIndex) NullCount(int) int64 { return 0 } 194 func (i doubleColumnIndex) NullPage(int) bool { return false } 195 func (i doubleColumnIndex) MinValue(int) Value { return makeValueDouble(i.page.min()) } 196 func (i doubleColumnIndex) MaxValue(int) Value { return makeValueDouble(i.page.max()) } 197 func (i doubleColumnIndex) IsAscending() bool { return false } 198 func (i doubleColumnIndex) IsDescending() bool { return false } 199 200 type byteArrayColumnIndex struct{ page *byteArrayPage } 201 202 func (i byteArrayColumnIndex) NumPages() int { return 1 } 203 func (i byteArrayColumnIndex) NullCount(int) int64 { return 0 } 204 func (i byteArrayColumnIndex) NullPage(int) bool { return false } 205 func (i byteArrayColumnIndex) MinValue(int) Value { return makeValueBytes(ByteArray, i.page.min()) } 206 func (i byteArrayColumnIndex) MaxValue(int) Value { return makeValueBytes(ByteArray, i.page.max()) } 207 func (i byteArrayColumnIndex) IsAscending() bool { return false } 208 func (i byteArrayColumnIndex) IsDescending() bool { return false } 209 210 type fixedLenByteArrayColumnIndex struct{ page *fixedLenByteArrayPage } 211 212 func (i fixedLenByteArrayColumnIndex) NumPages() int { return 1 } 213 func (i fixedLenByteArrayColumnIndex) NullCount(int) int64 { return 0 } 214 func (i fixedLenByteArrayColumnIndex) NullPage(int) bool { return false } 215 func (i fixedLenByteArrayColumnIndex) MinValue(int) Value { 216 return makeValueBytes(FixedLenByteArray, i.page.min()) 217 } 218 func (i fixedLenByteArrayColumnIndex) MaxValue(int) Value { 219 return makeValueBytes(FixedLenByteArray, i.page.max()) 220 } 221 func (i fixedLenByteArrayColumnIndex) IsAscending() bool { return false } 222 func (i fixedLenByteArrayColumnIndex) IsDescending() bool { return false } 223 224 type uint32ColumnIndex struct{ page *uint32Page } 225 226 func (i uint32ColumnIndex) NumPages() int { return 1 } 227 func (i uint32ColumnIndex) NullCount(int) int64 { return 0 } 228 func (i uint32ColumnIndex) NullPage(int) bool { return false } 229 func (i uint32ColumnIndex) MinValue(int) Value { return makeValueUint32(i.page.min()) } 230 func (i uint32ColumnIndex) MaxValue(int) Value { return makeValueUint32(i.page.max()) } 231 func (i uint32ColumnIndex) IsAscending() bool { return false } 232 func (i uint32ColumnIndex) IsDescending() bool { return false } 233 234 type uint64ColumnIndex struct{ page *uint64Page } 235 236 func (i uint64ColumnIndex) NumPages() int { return 1 } 237 func (i uint64ColumnIndex) NullCount(int) int64 { return 0 } 238 func (i uint64ColumnIndex) NullPage(int) bool { return false } 239 func (i uint64ColumnIndex) MinValue(int) Value { return makeValueUint64(i.page.min()) } 240 func (i uint64ColumnIndex) MaxValue(int) Value { return makeValueUint64(i.page.max()) } 241 func (i uint64ColumnIndex) IsAscending() bool { return false } 242 func (i uint64ColumnIndex) IsDescending() bool { return false } 243 244 type be128ColumnIndex struct{ page *be128Page } 245 246 func (i be128ColumnIndex) NumPages() int { return 1 } 247 func (i be128ColumnIndex) NullCount(int) int64 { return 0 } 248 func (i be128ColumnIndex) NullPage(int) bool { return false } 249 func (i be128ColumnIndex) MinValue(int) Value { return makeValueBytes(FixedLenByteArray, i.page.min()) } 250 func (i be128ColumnIndex) MaxValue(int) Value { return makeValueBytes(FixedLenByteArray, i.page.max()) } 251 func (i be128ColumnIndex) IsAscending() bool { return false } 252 func (i be128ColumnIndex) IsDescending() bool { return false } 253 254 // The ColumnIndexer interface is implemented by types that support generating 255 // parquet column indexes. 256 // 257 // The package does not export any types that implement this interface, programs 258 // must call NewColumnIndexer on a Type instance to construct column indexers. 259 type ColumnIndexer interface { 260 // Resets the column indexer state. 261 Reset() 262 263 // Add a page to the column indexer. 264 IndexPage(numValues, numNulls int64, min, max Value) 265 266 // Generates a format.ColumnIndex value from the current state of the 267 // column indexer. 268 // 269 // The returned value may reference internal buffers, in which case the 270 // values remain valid until the next call to IndexPage or Reset on the 271 // column indexer. 272 ColumnIndex() format.ColumnIndex 273 } 274 275 type baseColumnIndexer struct { 276 nullPages []bool 277 nullCounts []int64 278 } 279 280 func (i *baseColumnIndexer) reset() { 281 i.nullPages = i.nullPages[:0] 282 i.nullCounts = i.nullCounts[:0] 283 } 284 285 func (i *baseColumnIndexer) observe(numValues, numNulls int64) { 286 i.nullPages = append(i.nullPages, numValues == numNulls) 287 i.nullCounts = append(i.nullCounts, numNulls) 288 } 289 290 func (i *baseColumnIndexer) columnIndex(minValues, maxValues [][]byte, minOrder, maxOrder int) format.ColumnIndex { 291 return format.ColumnIndex{ 292 NullPages: i.nullPages, 293 NullCounts: i.nullCounts, 294 MinValues: minValues, 295 MaxValues: maxValues, 296 BoundaryOrder: boundaryOrderOf(minOrder, maxOrder), 297 } 298 } 299 300 type booleanColumnIndexer struct { 301 baseColumnIndexer 302 minValues []bool 303 maxValues []bool 304 } 305 306 func newBooleanColumnIndexer() *booleanColumnIndexer { 307 return new(booleanColumnIndexer) 308 } 309 310 func (i *booleanColumnIndexer) Reset() { 311 i.reset() 312 i.minValues = i.minValues[:0] 313 i.maxValues = i.maxValues[:0] 314 } 315 316 func (i *booleanColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { 317 i.observe(numValues, numNulls) 318 i.minValues = append(i.minValues, min.Boolean()) 319 i.maxValues = append(i.maxValues, max.Boolean()) 320 } 321 322 func (i *booleanColumnIndexer) ColumnIndex() format.ColumnIndex { 323 return i.columnIndex( 324 splitFixedLenByteArrays(unsafecast.BoolToBytes(i.minValues), 1), 325 splitFixedLenByteArrays(unsafecast.BoolToBytes(i.maxValues), 1), 326 orderOfBool(i.minValues), 327 orderOfBool(i.maxValues), 328 ) 329 } 330 331 type int32ColumnIndexer struct { 332 baseColumnIndexer 333 minValues []int32 334 maxValues []int32 335 } 336 337 func newInt32ColumnIndexer() *int32ColumnIndexer { 338 return new(int32ColumnIndexer) 339 } 340 341 func (i *int32ColumnIndexer) Reset() { 342 i.reset() 343 i.minValues = i.minValues[:0] 344 i.maxValues = i.maxValues[:0] 345 } 346 347 func (i *int32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { 348 i.observe(numValues, numNulls) 349 i.minValues = append(i.minValues, min.Int32()) 350 i.maxValues = append(i.maxValues, max.Int32()) 351 } 352 353 func (i *int32ColumnIndexer) ColumnIndex() format.ColumnIndex { 354 return i.columnIndex( 355 splitFixedLenByteArrays(unsafecast.Int32ToBytes(i.minValues), 4), 356 splitFixedLenByteArrays(unsafecast.Int32ToBytes(i.maxValues), 4), 357 orderOfInt32(i.minValues), 358 orderOfInt32(i.maxValues), 359 ) 360 } 361 362 type int64ColumnIndexer struct { 363 baseColumnIndexer 364 minValues []int64 365 maxValues []int64 366 } 367 368 func newInt64ColumnIndexer() *int64ColumnIndexer { 369 return new(int64ColumnIndexer) 370 } 371 372 func (i *int64ColumnIndexer) Reset() { 373 i.reset() 374 i.minValues = i.minValues[:0] 375 i.maxValues = i.maxValues[:0] 376 } 377 378 func (i *int64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { 379 i.observe(numValues, numNulls) 380 i.minValues = append(i.minValues, min.Int64()) 381 i.maxValues = append(i.maxValues, max.Int64()) 382 } 383 384 func (i *int64ColumnIndexer) ColumnIndex() format.ColumnIndex { 385 return i.columnIndex( 386 splitFixedLenByteArrays(unsafecast.Int64ToBytes(i.minValues), 8), 387 splitFixedLenByteArrays(unsafecast.Int64ToBytes(i.maxValues), 8), 388 orderOfInt64(i.minValues), 389 orderOfInt64(i.maxValues), 390 ) 391 } 392 393 type int96ColumnIndexer struct { 394 baseColumnIndexer 395 minValues []deprecated.Int96 396 maxValues []deprecated.Int96 397 } 398 399 func newInt96ColumnIndexer() *int96ColumnIndexer { 400 return new(int96ColumnIndexer) 401 } 402 403 func (i *int96ColumnIndexer) Reset() { 404 i.reset() 405 i.minValues = i.minValues[:0] 406 i.maxValues = i.maxValues[:0] 407 } 408 409 func (i *int96ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { 410 i.observe(numValues, numNulls) 411 i.minValues = append(i.minValues, min.Int96()) 412 i.maxValues = append(i.maxValues, max.Int96()) 413 } 414 415 func (i *int96ColumnIndexer) ColumnIndex() format.ColumnIndex { 416 return i.columnIndex( 417 splitFixedLenByteArrays(deprecated.Int96ToBytes(i.minValues), 12), 418 splitFixedLenByteArrays(deprecated.Int96ToBytes(i.maxValues), 12), 419 deprecated.OrderOfInt96(i.minValues), 420 deprecated.OrderOfInt96(i.maxValues), 421 ) 422 } 423 424 type floatColumnIndexer struct { 425 baseColumnIndexer 426 minValues []float32 427 maxValues []float32 428 } 429 430 func newFloatColumnIndexer() *floatColumnIndexer { 431 return new(floatColumnIndexer) 432 } 433 434 func (i *floatColumnIndexer) Reset() { 435 i.reset() 436 i.minValues = i.minValues[:0] 437 i.maxValues = i.maxValues[:0] 438 } 439 440 func (i *floatColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { 441 i.observe(numValues, numNulls) 442 i.minValues = append(i.minValues, min.Float()) 443 i.maxValues = append(i.maxValues, max.Float()) 444 } 445 446 func (i *floatColumnIndexer) ColumnIndex() format.ColumnIndex { 447 return i.columnIndex( 448 splitFixedLenByteArrays(unsafecast.Float32ToBytes(i.minValues), 4), 449 splitFixedLenByteArrays(unsafecast.Float32ToBytes(i.maxValues), 4), 450 orderOfFloat32(i.minValues), 451 orderOfFloat32(i.maxValues), 452 ) 453 } 454 455 type doubleColumnIndexer struct { 456 baseColumnIndexer 457 minValues []float64 458 maxValues []float64 459 } 460 461 func newDoubleColumnIndexer() *doubleColumnIndexer { 462 return new(doubleColumnIndexer) 463 } 464 465 func (i *doubleColumnIndexer) Reset() { 466 i.reset() 467 i.minValues = i.minValues[:0] 468 i.maxValues = i.maxValues[:0] 469 } 470 471 func (i *doubleColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { 472 i.observe(numValues, numNulls) 473 i.minValues = append(i.minValues, min.Double()) 474 i.maxValues = append(i.maxValues, max.Double()) 475 } 476 477 func (i *doubleColumnIndexer) ColumnIndex() format.ColumnIndex { 478 return i.columnIndex( 479 splitFixedLenByteArrays(unsafecast.Float64ToBytes(i.minValues), 8), 480 splitFixedLenByteArrays(unsafecast.Float64ToBytes(i.maxValues), 8), 481 orderOfFloat64(i.minValues), 482 orderOfFloat64(i.maxValues), 483 ) 484 } 485 486 type byteArrayColumnIndexer struct { 487 baseColumnIndexer 488 sizeLimit int 489 minValues []byte 490 maxValues []byte 491 } 492 493 func newByteArrayColumnIndexer(sizeLimit int) *byteArrayColumnIndexer { 494 return &byteArrayColumnIndexer{sizeLimit: sizeLimit} 495 } 496 497 func (i *byteArrayColumnIndexer) Reset() { 498 i.reset() 499 i.minValues = i.minValues[:0] 500 i.maxValues = i.maxValues[:0] 501 } 502 503 func (i *byteArrayColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { 504 i.observe(numValues, numNulls) 505 minValue := min.ByteArray() 506 maxValue := max.ByteArray() 507 if i.sizeLimit > 0 { 508 minValue = truncateLargeMinByteArrayValue(minValue, i.sizeLimit) 509 maxValue = truncateLargeMaxByteArrayValue(maxValue, i.sizeLimit) 510 } 511 i.minValues = plain.AppendByteArray(i.minValues, minValue) 512 i.maxValues = plain.AppendByteArray(i.maxValues, maxValue) 513 } 514 515 func (i *byteArrayColumnIndexer) ColumnIndex() format.ColumnIndex { 516 minValues := splitByteArrays(i.minValues) 517 maxValues := splitByteArrays(i.maxValues) 518 return i.columnIndex( 519 minValues, 520 maxValues, 521 orderOfBytes(minValues), 522 orderOfBytes(maxValues), 523 ) 524 } 525 526 type fixedLenByteArrayColumnIndexer struct { 527 baseColumnIndexer 528 size int 529 sizeLimit int 530 minValues []byte 531 maxValues []byte 532 } 533 534 func newFixedLenByteArrayColumnIndexer(size, sizeLimit int) *fixedLenByteArrayColumnIndexer { 535 return &fixedLenByteArrayColumnIndexer{ 536 size: size, 537 sizeLimit: sizeLimit, 538 } 539 } 540 541 func (i *fixedLenByteArrayColumnIndexer) Reset() { 542 i.reset() 543 i.minValues = i.minValues[:0] 544 i.maxValues = i.maxValues[:0] 545 } 546 547 func (i *fixedLenByteArrayColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { 548 i.observe(numValues, numNulls) 549 i.minValues = append(i.minValues, min.ByteArray()...) 550 i.maxValues = append(i.maxValues, max.ByteArray()...) 551 } 552 553 func (i *fixedLenByteArrayColumnIndexer) ColumnIndex() format.ColumnIndex { 554 minValues := splitFixedLenByteArrays(i.minValues, i.size) 555 maxValues := splitFixedLenByteArrays(i.maxValues, i.size) 556 if sizeLimit := i.sizeLimit; sizeLimit > 0 { 557 for i, v := range minValues { 558 minValues[i] = truncateLargeMinByteArrayValue(v, sizeLimit) 559 } 560 for i, v := range maxValues { 561 maxValues[i] = truncateLargeMaxByteArrayValue(v, sizeLimit) 562 } 563 } 564 return i.columnIndex( 565 minValues, 566 maxValues, 567 orderOfBytes(minValues), 568 orderOfBytes(maxValues), 569 ) 570 } 571 572 type uint32ColumnIndexer struct { 573 baseColumnIndexer 574 minValues []uint32 575 maxValues []uint32 576 } 577 578 func newUint32ColumnIndexer() *uint32ColumnIndexer { 579 return new(uint32ColumnIndexer) 580 } 581 582 func (i *uint32ColumnIndexer) Reset() { 583 i.reset() 584 i.minValues = i.minValues[:0] 585 i.maxValues = i.maxValues[:0] 586 } 587 588 func (i *uint32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { 589 i.observe(numValues, numNulls) 590 i.minValues = append(i.minValues, min.Uint32()) 591 i.maxValues = append(i.maxValues, max.Uint32()) 592 } 593 594 func (i *uint32ColumnIndexer) ColumnIndex() format.ColumnIndex { 595 return i.columnIndex( 596 splitFixedLenByteArrays(unsafecast.Uint32ToBytes(i.minValues), 4), 597 splitFixedLenByteArrays(unsafecast.Uint32ToBytes(i.maxValues), 4), 598 orderOfUint32(i.minValues), 599 orderOfUint32(i.maxValues), 600 ) 601 } 602 603 type uint64ColumnIndexer struct { 604 baseColumnIndexer 605 minValues []uint64 606 maxValues []uint64 607 } 608 609 func newUint64ColumnIndexer() *uint64ColumnIndexer { 610 return new(uint64ColumnIndexer) 611 } 612 613 func (i *uint64ColumnIndexer) Reset() { 614 i.reset() 615 i.minValues = i.minValues[:0] 616 i.maxValues = i.maxValues[:0] 617 } 618 619 func (i *uint64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { 620 i.observe(numValues, numNulls) 621 i.minValues = append(i.minValues, min.Uint64()) 622 i.maxValues = append(i.maxValues, max.Uint64()) 623 } 624 625 func (i *uint64ColumnIndexer) ColumnIndex() format.ColumnIndex { 626 return i.columnIndex( 627 splitFixedLenByteArrays(unsafecast.Uint64ToBytes(i.minValues), 8), 628 splitFixedLenByteArrays(unsafecast.Uint64ToBytes(i.maxValues), 8), 629 orderOfUint64(i.minValues), 630 orderOfUint64(i.maxValues), 631 ) 632 } 633 634 type be128ColumnIndexer struct { 635 baseColumnIndexer 636 minValues [][16]byte 637 maxValues [][16]byte 638 } 639 640 func newBE128ColumnIndexer() *be128ColumnIndexer { 641 return new(be128ColumnIndexer) 642 } 643 644 func (i *be128ColumnIndexer) Reset() { 645 i.reset() 646 i.minValues = i.minValues[:0] 647 i.maxValues = i.maxValues[:0] 648 } 649 650 func (i *be128ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) { 651 i.observe(numValues, numNulls) 652 if !min.IsNull() { 653 i.minValues = append(i.minValues, *(*[16]byte)(min.ByteArray())) 654 } 655 if !max.IsNull() { 656 i.maxValues = append(i.maxValues, *(*[16]byte)(max.ByteArray())) 657 } 658 } 659 660 func (i *be128ColumnIndexer) ColumnIndex() format.ColumnIndex { 661 minValues := splitFixedLenByteArrays(unsafecast.Uint128ToBytes(i.minValues), 16) 662 maxValues := splitFixedLenByteArrays(unsafecast.Uint128ToBytes(i.maxValues), 16) 663 return i.columnIndex( 664 minValues, 665 maxValues, 666 orderOfBytes(minValues), 667 orderOfBytes(maxValues), 668 ) 669 } 670 671 func truncateLargeMinByteArrayValue(value []byte, sizeLimit int) []byte { 672 if len(value) > sizeLimit { 673 value = value[:sizeLimit] 674 } 675 return value 676 } 677 678 func truncateLargeMaxByteArrayValue(value []byte, sizeLimit int) []byte { 679 if len(value) > sizeLimit && !isMaxByteArrayValue(value) { 680 value = value[:sizeLimit] 681 } 682 return value 683 } 684 685 func isMaxByteArrayValue(value []byte) bool { 686 for i := range value { 687 if value[i] != 0xFF { 688 return false 689 } 690 } 691 return true 692 } 693 694 func splitByteArrays(data []byte) [][]byte { 695 length := 0 696 plain.RangeByteArray(data, func([]byte) error { 697 length++ 698 return nil 699 }) 700 buffer := make([]byte, 0, len(data)-(4*length)) 701 values := make([][]byte, 0, length) 702 plain.RangeByteArray(data, func(value []byte) error { 703 offset := len(buffer) 704 buffer = append(buffer, value...) 705 values = append(values, buffer[offset:]) 706 return nil 707 }) 708 return values 709 } 710 711 func splitFixedLenByteArrays(data []byte, size int) [][]byte { 712 data = copyBytes(data) 713 values := make([][]byte, len(data)/size) 714 for i := range values { 715 j := (i + 0) * size 716 k := (i + 1) * size 717 values[i] = data[j:k:k] 718 } 719 return values 720 } 721 722 func boundaryOrderOf(minOrder, maxOrder int) format.BoundaryOrder { 723 if minOrder == maxOrder { 724 switch { 725 case minOrder > 0: 726 return format.Ascending 727 case minOrder < 0: 728 return format.Descending 729 } 730 } 731 return format.Unordered 732 }