github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/dictionary.go (about) 1 package parquet 2 3 import ( 4 "io" 5 "math/bits" 6 "unsafe" 7 8 "github.com/vc42/parquet-go/deprecated" 9 "github.com/vc42/parquet-go/encoding" 10 "github.com/vc42/parquet-go/encoding/plain" 11 "github.com/vc42/parquet-go/hashprobe" 12 "github.com/vc42/parquet-go/internal/bitpack" 13 "github.com/vc42/parquet-go/internal/unsafecast" 14 "github.com/vc42/parquet-go/sparse" 15 ) 16 17 const ( 18 // Maximum load of probing tables. This parameter configures the balance 19 // between memory density and compute time of probing operations. Valid 20 // values are floating point numbers between 0 and 1. 21 // 22 // Smaller values result in lower collision probability when inserting 23 // values in probing tables, but also increase memory utilization. 24 // 25 // TODO: make this configurable by the application? 26 hashprobeTableMaxLoad = 0.85 27 28 // An estimate of the CPU cache footprint used by insert operations. 29 // 30 // This constant is used to determine a useful chunk size depending on the 31 // size of values being inserted in dictionaries. More values of small size 32 // can fit in CPU caches, so the inserts can operation on larger chunks. 33 insertsTargetCacheFootprint = 8192 34 ) 35 36 // The Dictionary interface represents type-specific implementations of parquet 37 // dictionaries. 38 // 39 // Programs can instantiate dictionaries by call the NewDictionary method of a 40 // Type object. 41 // 42 // The current implementation has a limitation which prevents applications from 43 // providing custom versions of this interface because it contains unexported 44 // methods. The only way to create Dictionary values is to call the 45 // NewDictionary of Type instances. This limitation may be lifted in future 46 // releases. 47 type Dictionary interface { 48 // Returns the type that the dictionary was created from. 49 Type() Type 50 51 // Returns the number of value indexed in the dictionary. 52 Len() int 53 54 // Returns the dictionary value at the given index. 55 Index(index int32) Value 56 57 // Inserts values from the second slice to the dictionary and writes the 58 // indexes at which each value was inserted to the first slice. 59 // 60 // The method panics if the length of the indexes slice is smaller than the 61 // length of the values slice. 62 Insert(indexes []int32, values []Value) 63 64 // Given an array of dictionary indexes, lookup the values into the array 65 // of values passed as second argument. 66 // 67 // The method panics if len(indexes) > len(values), or one of the indexes 68 // is negative or greater than the highest index in the dictionary. 69 Lookup(indexes []int32, values []Value) 70 71 // Returns the min and max values found in the given indexes. 72 Bounds(indexes []int32) (min, max Value) 73 74 // Resets the dictionary to its initial state, removing all values. 75 Reset() 76 77 // Returns a BufferedPage representing the content of the dictionary. 78 // 79 // The returned page shares the underlying memory of the buffer, it remains 80 // valid to use until the dictionary's Reset method is called. 81 Page() BufferedPage 82 83 // See ColumnBuffer.writeValues for details on the use of unexported methods 84 // on interfaces. 85 insert(indexes []int32, rows sparse.Array) 86 //lookup(indexes []int32, rows sparse.Array) 87 } 88 89 func checkLookupIndexBounds(indexes []int32, rows sparse.Array) { 90 if rows.Len() < len(indexes) { 91 panic("dictionary lookup with more indexes than values") 92 } 93 } 94 95 // The boolean dictionary always contains two values for true and false. 96 type booleanDictionary struct { 97 booleanPage 98 // There are only two possible values for booleans, false and true. 99 // Rather than using a Go map, we track the indexes of each values 100 // in an array of two 32 bits integers. When inserting values in the 101 // dictionary, we ensure that an index exist for each boolean value, 102 // then use the value 0 or 1 (false or true) to perform a lookup in 103 // the dictionary's map. 104 table [2]int32 105 } 106 107 func newBooleanDictionary(typ Type, columnIndex int16, numValues int32, values []byte) *booleanDictionary { 108 indexOfFalse, indexOfTrue := int32(-1), int32(-1) 109 110 for i := int32(0); i < numValues && indexOfFalse < 0 && indexOfTrue < 0; i += 8 { 111 v := values[i] 112 if v != 0x00 { 113 indexOfTrue = i + int32(bits.TrailingZeros8(v)) 114 } 115 if v != 0xFF { 116 indexOfFalse = i + int32(bits.TrailingZeros8(^v)) 117 } 118 } 119 120 return &booleanDictionary{ 121 booleanPage: booleanPage{ 122 typ: typ, 123 bits: values[:bitpack.ByteCount(uint(numValues))], 124 numValues: numValues, 125 columnIndex: ^columnIndex, 126 }, 127 table: [2]int32{ 128 0: indexOfFalse, 129 1: indexOfTrue, 130 }, 131 } 132 } 133 134 func (d *booleanDictionary) Type() Type { return newIndexedType(d.typ, d) } 135 136 func (d *booleanDictionary) Len() int { return int(d.numValues) } 137 138 func (d *booleanDictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 139 140 func (d *booleanDictionary) index(i int32) bool { return d.valueAt(int(i)) } 141 142 func (d *booleanDictionary) Insert(indexes []int32, values []Value) { 143 model := Value{} 144 d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 145 } 146 147 func (d *booleanDictionary) insert(indexes []int32, rows sparse.Array) { 148 _ = indexes[:rows.Len()] 149 150 if d.table[0] < 0 { 151 d.table[0] = d.numValues 152 d.numValues++ 153 d.bits = plain.AppendBoolean(d.bits, int(d.table[0]), false) 154 } 155 156 if d.table[1] < 0 { 157 d.table[1] = d.numValues 158 d.numValues++ 159 d.bits = plain.AppendBoolean(d.bits, int(d.table[1]), true) 160 } 161 162 dict := d.table 163 164 for i := 0; i < rows.Len(); i++ { 165 v := *(*byte)(rows.Index(i)) & 1 166 indexes[i] = dict[v] 167 } 168 } 169 170 func (d *booleanDictionary) Lookup(indexes []int32, values []Value) { 171 model := d.makeValue(false) 172 memsetValues(values, model) 173 d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 174 } 175 176 func (d *booleanDictionary) lookup(indexes []int32, rows sparse.Array) { 177 checkLookupIndexBounds(indexes, rows) 178 for i, j := range indexes { 179 *(*bool)(rows.Index(i)) = d.index(j) 180 } 181 } 182 183 func (d *booleanDictionary) Bounds(indexes []int32) (min, max Value) { 184 if len(indexes) > 0 { 185 hasFalse, hasTrue := false, false 186 187 for _, i := range indexes { 188 v := d.index(i) 189 if v { 190 hasTrue = true 191 } else { 192 hasFalse = true 193 } 194 if hasTrue && hasFalse { 195 break 196 } 197 } 198 199 min = d.makeValue(!hasFalse) 200 max = d.makeValue(hasTrue) 201 } 202 return min, max 203 } 204 205 func (d *booleanDictionary) Reset() { 206 d.bits = d.bits[:0] 207 d.offset = 0 208 d.numValues = 0 209 d.table = [2]int32{-1, -1} 210 } 211 212 func (d *booleanDictionary) Page() BufferedPage { 213 return &d.booleanPage 214 } 215 216 type int32Dictionary struct { 217 int32Page 218 table *hashprobe.Int32Table 219 } 220 221 func newInt32Dictionary(typ Type, columnIndex int16, numValues int32, values []byte) *int32Dictionary { 222 return &int32Dictionary{ 223 int32Page: int32Page{ 224 typ: typ, 225 values: unsafecast.BytesToInt32(values)[:numValues], 226 columnIndex: ^columnIndex, 227 }, 228 } 229 } 230 231 func (d *int32Dictionary) Type() Type { return newIndexedType(d.typ, d) } 232 233 func (d *int32Dictionary) Len() int { return len(d.values) } 234 235 func (d *int32Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 236 237 func (d *int32Dictionary) index(i int32) int32 { return d.values[i] } 238 239 func (d *int32Dictionary) Insert(indexes []int32, values []Value) { 240 model := Value{} 241 d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 242 } 243 244 func (d *int32Dictionary) init(indexes []int32) { 245 d.table = hashprobe.NewInt32Table(cap(d.values), hashprobeTableMaxLoad) 246 247 n := min(len(d.values), len(indexes)) 248 249 for i := 0; i < len(d.values); i += n { 250 j := min(i+n, len(d.values)) 251 d.table.Probe(d.values[i:j:j], indexes[:n:n]) 252 } 253 } 254 255 func (d *int32Dictionary) insert(indexes []int32, rows sparse.Array) { 256 // Iterating over the input in chunks helps keep relevant data in CPU 257 // caches when a large number of values are inserted into the dictionary with 258 // a single method call. 259 // 260 // Without this chunking, memory areas from the head of the indexes and 261 // values arrays end up being evicted from CPU caches as the probing 262 // operation iterates through the array. The subsequent scan of the indexes 263 // required to determine which values must be inserted into the page then 264 // stalls on retrieving data from main memory. 265 // 266 // We measured as much as ~37% drop in throughput when disabling the 267 // chunking, and did not observe any penalties from having it on smaller 268 // inserts. 269 const chunkSize = insertsTargetCacheFootprint / 4 270 271 if d.table == nil { 272 d.init(indexes) 273 } 274 275 values := rows.Int32Array() 276 277 for i := 0; i < values.Len(); i += chunkSize { 278 j := min(i+chunkSize, values.Len()) 279 280 if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 { 281 for k, index := range indexes[i:j] { 282 if index == int32(len(d.values)) { 283 d.values = append(d.values, values.Index(i+k)) 284 } 285 } 286 } 287 } 288 } 289 290 func (d *int32Dictionary) Lookup(indexes []int32, values []Value) { 291 model := d.makeValue(0) 292 memsetValues(values, model) 293 d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 294 } 295 296 func (d *int32Dictionary) Bounds(indexes []int32) (min, max Value) { 297 if len(indexes) > 0 { 298 minValue, maxValue := d.bounds(indexes) 299 min = d.makeValue(minValue) 300 max = d.makeValue(maxValue) 301 } 302 return min, max 303 } 304 305 func (d *int32Dictionary) Reset() { 306 d.values = d.values[:0] 307 if d.table != nil { 308 d.table.Reset() 309 } 310 } 311 312 func (d *int32Dictionary) Page() BufferedPage { 313 return &d.int32Page 314 } 315 316 type int64Dictionary struct { 317 int64Page 318 table *hashprobe.Int64Table 319 } 320 321 func newInt64Dictionary(typ Type, columnIndex int16, numValues int32, values []byte) *int64Dictionary { 322 return &int64Dictionary{ 323 int64Page: int64Page{ 324 typ: typ, 325 values: unsafecast.BytesToInt64(values)[:numValues], 326 columnIndex: ^columnIndex, 327 }, 328 } 329 } 330 331 func (d *int64Dictionary) Type() Type { return newIndexedType(d.typ, d) } 332 333 func (d *int64Dictionary) Len() int { return len(d.values) } 334 335 func (d *int64Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 336 337 func (d *int64Dictionary) index(i int32) int64 { return d.values[i] } 338 339 func (d *int64Dictionary) Insert(indexes []int32, values []Value) { 340 model := Value{} 341 d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 342 } 343 344 func (d *int64Dictionary) init(indexes []int32) { 345 d.table = hashprobe.NewInt64Table(cap(d.values), hashprobeTableMaxLoad) 346 347 n := min(len(d.values), len(indexes)) 348 349 for i := 0; i < len(d.values); i += n { 350 j := min(i+n, len(d.values)) 351 d.table.Probe(d.values[i:j:j], indexes[:n:n]) 352 } 353 } 354 355 func (d *int64Dictionary) insert(indexes []int32, rows sparse.Array) { 356 const chunkSize = insertsTargetCacheFootprint / 8 357 358 if d.table == nil { 359 d.init(indexes) 360 } 361 362 values := rows.Int64Array() 363 364 for i := 0; i < values.Len(); i += chunkSize { 365 j := min(i+chunkSize, values.Len()) 366 367 if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 { 368 for k, index := range indexes[i:j] { 369 if index == int32(len(d.values)) { 370 d.values = append(d.values, values.Index(i+k)) 371 } 372 } 373 } 374 } 375 } 376 377 func (d *int64Dictionary) Lookup(indexes []int32, values []Value) { 378 model := d.makeValue(0) 379 memsetValues(values, model) 380 d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 381 } 382 383 func (d *int64Dictionary) Bounds(indexes []int32) (min, max Value) { 384 if len(indexes) > 0 { 385 minValue, maxValue := d.bounds(indexes) 386 min = d.makeValue(minValue) 387 max = d.makeValue(maxValue) 388 } 389 return min, max 390 } 391 392 func (d *int64Dictionary) Reset() { 393 d.values = d.values[:0] 394 if d.table != nil { 395 d.table.Reset() 396 } 397 } 398 399 func (d *int64Dictionary) Page() BufferedPage { 400 return &d.int64Page 401 } 402 403 type int96Dictionary struct { 404 int96Page 405 hashmap map[deprecated.Int96]int32 406 } 407 408 func newInt96Dictionary(typ Type, columnIndex int16, numValues int32, values []byte) *int96Dictionary { 409 return &int96Dictionary{ 410 int96Page: int96Page{ 411 typ: typ, 412 values: deprecated.BytesToInt96(values)[:numValues], 413 columnIndex: ^columnIndex, 414 }, 415 } 416 } 417 418 func (d *int96Dictionary) Type() Type { return newIndexedType(d.typ, d) } 419 420 func (d *int96Dictionary) Len() int { return len(d.values) } 421 422 func (d *int96Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 423 424 func (d *int96Dictionary) index(i int32) deprecated.Int96 { return d.values[i] } 425 426 func (d *int96Dictionary) Insert(indexes []int32, values []Value) { 427 d.insertValues(indexes, len(values), func(i int) deprecated.Int96 { 428 return values[i].Int96() 429 }) 430 } 431 432 func (d *int96Dictionary) insert(indexes []int32, rows sparse.Array) { 433 d.insertValues(indexes, rows.Len(), func(i int) deprecated.Int96 { 434 return *(*deprecated.Int96)(rows.Index(i)) 435 }) 436 } 437 438 func (d *int96Dictionary) insertValues(indexes []int32, count int, valueAt func(int) deprecated.Int96) { 439 _ = indexes[:count] 440 441 if d.hashmap == nil { 442 d.hashmap = make(map[deprecated.Int96]int32, cap(d.values)) 443 for i, v := range d.values { 444 d.hashmap[v] = int32(i) 445 } 446 } 447 448 for i := 0; i < count; i++ { 449 value := valueAt(i) 450 451 index, exists := d.hashmap[value] 452 if !exists { 453 index = int32(len(d.values)) 454 d.values = append(d.values, value) 455 d.hashmap[value] = index 456 } 457 458 indexes[i] = index 459 } 460 } 461 462 func (d *int96Dictionary) Lookup(indexes []int32, values []Value) { 463 for i, j := range indexes { 464 values[i] = d.Index(j) 465 } 466 } 467 468 func (d *int96Dictionary) Bounds(indexes []int32) (min, max Value) { 469 if len(indexes) > 0 { 470 minValue := d.index(indexes[0]) 471 maxValue := minValue 472 473 for _, i := range indexes[1:] { 474 value := d.index(i) 475 switch { 476 case value.Less(minValue): 477 minValue = value 478 case maxValue.Less(value): 479 maxValue = value 480 } 481 } 482 483 min = d.makeValue(minValue) 484 max = d.makeValue(maxValue) 485 } 486 return min, max 487 } 488 489 func (d *int96Dictionary) Reset() { 490 d.values = d.values[:0] 491 d.hashmap = nil 492 } 493 494 func (d *int96Dictionary) Page() BufferedPage { 495 return &d.int96Page 496 } 497 498 type floatDictionary struct { 499 floatPage 500 table *hashprobe.Float32Table 501 } 502 503 func newFloatDictionary(typ Type, columnIndex int16, numValues int32, values []byte) *floatDictionary { 504 return &floatDictionary{ 505 floatPage: floatPage{ 506 typ: typ, 507 values: unsafecast.BytesToFloat32(values)[:numValues], 508 columnIndex: ^columnIndex, 509 }, 510 } 511 } 512 513 func (d *floatDictionary) Type() Type { return newIndexedType(d.typ, d) } 514 515 func (d *floatDictionary) Len() int { return len(d.values) } 516 517 func (d *floatDictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 518 519 func (d *floatDictionary) index(i int32) float32 { return d.values[i] } 520 521 func (d *floatDictionary) Insert(indexes []int32, values []Value) { 522 model := Value{} 523 d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 524 } 525 526 func (d *floatDictionary) init(indexes []int32) { 527 d.table = hashprobe.NewFloat32Table(cap(d.values), hashprobeTableMaxLoad) 528 529 n := min(len(d.values), len(indexes)) 530 531 for i := 0; i < len(d.values); i += n { 532 j := min(i+n, len(d.values)) 533 d.table.Probe(d.values[i:j:j], indexes[:n:n]) 534 } 535 } 536 537 func (d *floatDictionary) insert(indexes []int32, rows sparse.Array) { 538 const chunkSize = insertsTargetCacheFootprint / 4 539 540 if d.table == nil { 541 d.init(indexes) 542 } 543 544 values := rows.Float32Array() 545 546 for i := 0; i < values.Len(); i += chunkSize { 547 j := min(i+chunkSize, values.Len()) 548 549 if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 { 550 for k, index := range indexes[i:j] { 551 if index == int32(len(d.values)) { 552 d.values = append(d.values, values.Index(i+k)) 553 } 554 } 555 } 556 } 557 } 558 559 func (d *floatDictionary) Lookup(indexes []int32, values []Value) { 560 model := d.makeValue(0) 561 memsetValues(values, model) 562 d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 563 } 564 565 func (d *floatDictionary) Bounds(indexes []int32) (min, max Value) { 566 if len(indexes) > 0 { 567 minValue, maxValue := d.bounds(indexes) 568 min = d.makeValue(minValue) 569 max = d.makeValue(maxValue) 570 } 571 return min, max 572 } 573 574 func (d *floatDictionary) Reset() { 575 d.values = d.values[:0] 576 if d.table != nil { 577 d.table.Reset() 578 } 579 } 580 581 func (d *floatDictionary) Page() BufferedPage { 582 return &d.floatPage 583 } 584 585 type doubleDictionary struct { 586 doublePage 587 table *hashprobe.Float64Table 588 } 589 590 func newDoubleDictionary(typ Type, columnIndex int16, numValues int32, values []byte) *doubleDictionary { 591 return &doubleDictionary{ 592 doublePage: doublePage{ 593 typ: typ, 594 values: unsafecast.BytesToFloat64(values)[:numValues], 595 columnIndex: ^columnIndex, 596 }, 597 } 598 } 599 600 func (d *doubleDictionary) Type() Type { return newIndexedType(d.typ, d) } 601 602 func (d *doubleDictionary) Len() int { return len(d.values) } 603 604 func (d *doubleDictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 605 606 func (d *doubleDictionary) index(i int32) float64 { return d.values[i] } 607 608 func (d *doubleDictionary) Insert(indexes []int32, values []Value) { 609 model := Value{} 610 d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 611 } 612 613 func (d *doubleDictionary) init(indexes []int32) { 614 d.table = hashprobe.NewFloat64Table(cap(d.values), hashprobeTableMaxLoad) 615 616 n := min(len(d.values), len(indexes)) 617 618 for i := 0; i < len(d.values); i += n { 619 j := min(i+n, len(d.values)) 620 d.table.Probe(d.values[i:j:j], indexes[:n:n]) 621 } 622 } 623 624 func (d *doubleDictionary) insert(indexes []int32, rows sparse.Array) { 625 const chunkSize = insertsTargetCacheFootprint / 8 626 627 if d.table == nil { 628 d.init(indexes) 629 } 630 631 values := rows.Float64Array() 632 633 for i := 0; i < values.Len(); i += chunkSize { 634 j := min(i+chunkSize, values.Len()) 635 636 if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 { 637 for k, index := range indexes[i:j] { 638 if index == int32(len(d.values)) { 639 d.values = append(d.values, values.Index(i+k)) 640 } 641 } 642 } 643 } 644 } 645 646 func (d *doubleDictionary) Lookup(indexes []int32, values []Value) { 647 model := d.makeValue(0) 648 memsetValues(values, model) 649 d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 650 } 651 652 func (d *doubleDictionary) Bounds(indexes []int32) (min, max Value) { 653 if len(indexes) > 0 { 654 minValue, maxValue := d.bounds(indexes) 655 min = d.makeValue(minValue) 656 max = d.makeValue(maxValue) 657 } 658 return min, max 659 } 660 661 func (d *doubleDictionary) Reset() { 662 d.values = d.values[:0] 663 if d.table != nil { 664 d.table.Reset() 665 } 666 } 667 668 func (d *doubleDictionary) Page() BufferedPage { 669 return &d.doublePage 670 } 671 672 type byteArrayDictionary struct { 673 byteArrayPage 674 offsets []uint32 675 hashmap map[string]int32 676 } 677 678 func newByteArrayDictionary(typ Type, columnIndex int16, numValues int32, values []byte) *byteArrayDictionary { 679 d := &byteArrayDictionary{ 680 offsets: make([]uint32, 0, numValues), 681 byteArrayPage: byteArrayPage{ 682 typ: typ, 683 values: values, 684 numValues: numValues, 685 columnIndex: ^columnIndex, 686 }, 687 } 688 689 for i := 0; i < len(values); { 690 n := plain.ByteArrayLength(values[i:]) 691 d.offsets = append(d.offsets, uint32(i)) 692 i += plain.ByteArrayLengthSize 693 i += n 694 } 695 696 return d 697 } 698 699 func (d *byteArrayDictionary) Type() Type { return newIndexedType(d.typ, d) } 700 701 func (d *byteArrayDictionary) Len() int { return len(d.offsets) } 702 703 func (d *byteArrayDictionary) Index(i int32) Value { return d.makeValueBytes(d.index(i)) } 704 705 func (d *byteArrayDictionary) index(i int32) []byte { return d.valueAt(d.offsets[i]) } 706 707 func (d *byteArrayDictionary) Insert(indexes []int32, values []Value) { 708 model := Value{} 709 d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr))) 710 } 711 712 func (d *byteArrayDictionary) insert(indexes []int32, rows sparse.Array) { 713 _ = indexes[:rows.Len()] 714 715 if d.hashmap == nil { 716 d.hashmap = make(map[string]int32, cap(d.offsets)) 717 for index, offset := range d.offsets { 718 d.hashmap[string(d.valueAt(offset))] = int32(index) 719 } 720 } 721 722 for i := 0; i < rows.Len(); i++ { 723 value := *(*string)(rows.Index(i)) 724 725 index, exists := d.hashmap[value] 726 if !exists { 727 index = int32(len(d.offsets)) 728 value = d.append(value) 729 d.hashmap[value] = index 730 } 731 732 indexes[i] = index 733 } 734 } 735 736 func (d *byteArrayDictionary) append(value string) string { 737 offset := len(d.values) 738 d.values = plain.AppendByteArrayString(d.values, value) 739 d.offsets = append(d.offsets, uint32(offset)) 740 d.numValues++ 741 return string(d.values[offset+plain.ByteArrayLengthSize : len(d.values)]) 742 } 743 744 func (d *byteArrayDictionary) Lookup(indexes []int32, values []Value) { 745 model := d.makeValueString("") 746 memsetValues(values, model) 747 d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr))) 748 } 749 750 func (d *byteArrayDictionary) Bounds(indexes []int32) (min, max Value) { 751 if len(indexes) > 0 { 752 base := d.index(indexes[0]) 753 minValue := unsafecast.BytesToString(base) 754 maxValue := minValue 755 values := [64]string{} 756 757 for i := 1; i < len(indexes); i += len(values) { 758 n := len(indexes) - i 759 if n > len(values) { 760 n = len(values) 761 } 762 j := i + n 763 d.lookupString(indexes[i:j:j], makeArrayString(values[:n:n])) 764 765 for _, value := range values[:n:n] { 766 switch { 767 case value < minValue: 768 minValue = value 769 case value > maxValue: 770 maxValue = value 771 } 772 } 773 } 774 775 min = d.makeValueString(minValue) 776 max = d.makeValueString(maxValue) 777 } 778 return min, max 779 } 780 781 func (d *byteArrayDictionary) Reset() { 782 d.offsets = d.offsets[:0] 783 d.values = d.values[:0] 784 d.numValues = 0 785 d.hashmap = nil 786 } 787 788 func (d *byteArrayDictionary) Page() BufferedPage { 789 return &d.byteArrayPage 790 } 791 792 type fixedLenByteArrayDictionary struct { 793 fixedLenByteArrayPage 794 hashmap map[string]int32 795 } 796 797 func newFixedLenByteArrayDictionary(typ Type, columnIndex int16, numValues int32, data []byte) *fixedLenByteArrayDictionary { 798 size := typ.Length() 799 return &fixedLenByteArrayDictionary{ 800 fixedLenByteArrayPage: fixedLenByteArrayPage{ 801 typ: typ, 802 size: size, 803 data: data, 804 columnIndex: ^columnIndex, 805 }, 806 } 807 } 808 809 func (d *fixedLenByteArrayDictionary) Type() Type { return newIndexedType(d.typ, d) } 810 811 func (d *fixedLenByteArrayDictionary) Len() int { return len(d.data) / d.size } 812 813 func (d *fixedLenByteArrayDictionary) Index(i int32) Value { 814 return d.makeValueBytes(d.index(i)) 815 } 816 817 func (d *fixedLenByteArrayDictionary) index(i int32) []byte { 818 j := (int(i) + 0) * d.size 819 k := (int(i) + 1) * d.size 820 return d.data[j:k:k] 821 } 822 823 func (d *fixedLenByteArrayDictionary) Insert(indexes []int32, values []Value) { 824 d.insertValues(indexes, len(values), func(i int) *byte { 825 return values[i].ptr 826 }) 827 } 828 829 func (d *fixedLenByteArrayDictionary) insert(indexes []int32, rows sparse.Array) { 830 d.insertValues(indexes, rows.Len(), func(i int) *byte { 831 return (*byte)(rows.Index(i)) 832 }) 833 } 834 835 func (d *fixedLenByteArrayDictionary) insertValues(indexes []int32, count int, valueAt func(int) *byte) { 836 _ = indexes[:count] 837 838 if d.hashmap == nil { 839 d.hashmap = make(map[string]int32, cap(d.data)/d.size) 840 for i, j := 0, int32(0); i < len(d.data); i += d.size { 841 d.hashmap[string(d.data[i:i+d.size])] = j 842 j++ 843 } 844 } 845 846 for i := 0; i < count; i++ { 847 value := unsafe.Slice(valueAt(i), d.size) 848 849 index, exists := d.hashmap[string(value)] 850 if !exists { 851 index = int32(d.Len()) 852 start := len(d.data) 853 d.data = append(d.data, value...) 854 d.hashmap[string(d.data[start:])] = index 855 } 856 857 indexes[i] = index 858 } 859 } 860 861 func (d *fixedLenByteArrayDictionary) Lookup(indexes []int32, values []Value) { 862 model := d.makeValueString("") 863 memsetValues(values, model) 864 d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr))) 865 } 866 867 func (d *fixedLenByteArrayDictionary) Bounds(indexes []int32) (min, max Value) { 868 if len(indexes) > 0 { 869 base := d.index(indexes[0]) 870 minValue := unsafecast.BytesToString(base) 871 maxValue := minValue 872 values := [64]string{} 873 874 for i := 1; i < len(indexes); i += len(values) { 875 n := len(indexes) - i 876 if n > len(values) { 877 n = len(values) 878 } 879 j := i + n 880 d.lookupString(indexes[i:j:j], makeArrayString(values[:n:n])) 881 882 for _, value := range values[:n:n] { 883 switch { 884 case value < minValue: 885 minValue = value 886 case value > maxValue: 887 maxValue = value 888 } 889 } 890 } 891 892 min = d.makeValueString(minValue) 893 max = d.makeValueString(maxValue) 894 } 895 return min, max 896 } 897 898 func (d *fixedLenByteArrayDictionary) Reset() { 899 d.data = d.data[:0] 900 d.hashmap = nil 901 } 902 903 func (d *fixedLenByteArrayDictionary) Page() BufferedPage { 904 return &d.fixedLenByteArrayPage 905 } 906 907 type uint32Dictionary struct { 908 uint32Page 909 table *hashprobe.Uint32Table 910 } 911 912 func newUint32Dictionary(typ Type, columnIndex int16, numValues int32, data []byte) *uint32Dictionary { 913 return &uint32Dictionary{ 914 uint32Page: uint32Page{ 915 typ: typ, 916 values: unsafecast.BytesToUint32(data)[:numValues], 917 columnIndex: ^columnIndex, 918 }, 919 } 920 } 921 922 func (d *uint32Dictionary) Type() Type { return newIndexedType(d.typ, d) } 923 924 func (d *uint32Dictionary) Len() int { return len(d.values) } 925 926 func (d *uint32Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 927 928 func (d *uint32Dictionary) index(i int32) uint32 { return d.values[i] } 929 930 func (d *uint32Dictionary) Insert(indexes []int32, values []Value) { 931 model := Value{} 932 d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 933 } 934 935 func (d *uint32Dictionary) init(indexes []int32) { 936 d.table = hashprobe.NewUint32Table(cap(d.values), hashprobeTableMaxLoad) 937 938 n := min(len(d.values), len(indexes)) 939 940 for i := 0; i < len(d.values); i += n { 941 j := min(i+n, len(d.values)) 942 d.table.Probe(d.values[i:j:j], indexes[:n:n]) 943 } 944 } 945 946 func (d *uint32Dictionary) insert(indexes []int32, rows sparse.Array) { 947 const chunkSize = insertsTargetCacheFootprint / 4 948 949 if d.table == nil { 950 d.init(indexes) 951 } 952 953 values := rows.Uint32Array() 954 955 for i := 0; i < values.Len(); i += chunkSize { 956 j := min(i+chunkSize, values.Len()) 957 958 if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 { 959 for k, index := range indexes[i:j] { 960 if index == int32(len(d.values)) { 961 d.values = append(d.values, values.Index(i+k)) 962 } 963 } 964 } 965 } 966 } 967 968 func (d *uint32Dictionary) Lookup(indexes []int32, values []Value) { 969 model := d.makeValue(0) 970 memsetValues(values, model) 971 d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 972 } 973 974 func (d *uint32Dictionary) Bounds(indexes []int32) (min, max Value) { 975 if len(indexes) > 0 { 976 minValue, maxValue := d.bounds(indexes) 977 min = d.makeValue(minValue) 978 max = d.makeValue(maxValue) 979 } 980 return min, max 981 } 982 983 func (d *uint32Dictionary) Reset() { 984 d.values = d.values[:0] 985 if d.table != nil { 986 d.table.Reset() 987 } 988 } 989 990 func (d *uint32Dictionary) Page() BufferedPage { 991 return &d.uint32Page 992 } 993 994 type uint64Dictionary struct { 995 uint64Page 996 table *hashprobe.Uint64Table 997 } 998 999 func newUint64Dictionary(typ Type, columnIndex int16, numValues int32, data []byte) *uint64Dictionary { 1000 return &uint64Dictionary{ 1001 uint64Page: uint64Page{ 1002 typ: typ, 1003 values: unsafecast.BytesToUint64(data), 1004 columnIndex: ^columnIndex, 1005 }, 1006 } 1007 } 1008 1009 func (d *uint64Dictionary) Type() Type { return newIndexedType(d.typ, d) } 1010 1011 func (d *uint64Dictionary) Len() int { return len(d.values) } 1012 1013 func (d *uint64Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 1014 1015 func (d *uint64Dictionary) index(i int32) uint64 { return d.values[i] } 1016 1017 func (d *uint64Dictionary) Insert(indexes []int32, values []Value) { 1018 model := Value{} 1019 d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 1020 } 1021 1022 func (d *uint64Dictionary) init(indexes []int32) { 1023 d.table = hashprobe.NewUint64Table(cap(d.values), hashprobeTableMaxLoad) 1024 1025 n := min(len(d.values), len(indexes)) 1026 1027 for i := 0; i < len(d.values); i += n { 1028 j := min(i+n, len(d.values)) 1029 d.table.Probe(d.values[i:j:j], indexes[:n:n]) 1030 } 1031 } 1032 1033 func (d *uint64Dictionary) insert(indexes []int32, rows sparse.Array) { 1034 const chunkSize = insertsTargetCacheFootprint / 8 1035 1036 if d.table == nil { 1037 d.init(indexes) 1038 } 1039 1040 values := rows.Uint64Array() 1041 1042 for i := 0; i < values.Len(); i += chunkSize { 1043 j := min(i+chunkSize, values.Len()) 1044 1045 if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 { 1046 for k, index := range indexes[i:j] { 1047 if index == int32(len(d.values)) { 1048 d.values = append(d.values, values.Index(i+k)) 1049 } 1050 } 1051 } 1052 } 1053 } 1054 1055 func (d *uint64Dictionary) Lookup(indexes []int32, values []Value) { 1056 model := d.makeValue(0) 1057 memsetValues(values, model) 1058 d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 1059 } 1060 1061 func (d *uint64Dictionary) Bounds(indexes []int32) (min, max Value) { 1062 if len(indexes) > 0 { 1063 minValue, maxValue := d.bounds(indexes) 1064 min = d.makeValue(minValue) 1065 max = d.makeValue(maxValue) 1066 } 1067 return min, max 1068 } 1069 1070 func (d *uint64Dictionary) Reset() { 1071 d.values = d.values[:0] 1072 if d.table != nil { 1073 d.table.Reset() 1074 } 1075 } 1076 1077 func (d *uint64Dictionary) Page() BufferedPage { 1078 return &d.uint64Page 1079 } 1080 1081 type be128Dictionary struct { 1082 be128Page 1083 table *hashprobe.Uint128Table 1084 } 1085 1086 func newBE128Dictionary(typ Type, columnIndex int16, numValues int32, data []byte) *be128Dictionary { 1087 return &be128Dictionary{ 1088 be128Page: be128Page{ 1089 typ: typ, 1090 values: unsafecast.BytesToUint128(data), 1091 columnIndex: ^columnIndex, 1092 }, 1093 } 1094 } 1095 1096 func (d *be128Dictionary) Type() Type { return newIndexedType(d.typ, d) } 1097 1098 func (d *be128Dictionary) Len() int { return len(d.values) } 1099 1100 func (d *be128Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 1101 1102 func (d *be128Dictionary) index(i int32) *[16]byte { return &d.values[i] } 1103 1104 func (d *be128Dictionary) Insert(indexes []int32, values []Value) { 1105 _ = indexes[:len(values)] 1106 1107 for _, v := range values { 1108 if v.kind != ^int8(FixedLenByteArray) { 1109 panic("values inserted in BE128 dictionary must be of type BYTE_ARRAY") 1110 } 1111 if v.u64 != 16 { 1112 panic("values inserted in BE128 dictionary must be of length 16") 1113 } 1114 } 1115 1116 if d.table == nil { 1117 d.init(indexes) 1118 } 1119 1120 const chunkSize = insertsTargetCacheFootprint / 16 1121 var buffer [chunkSize][16]byte 1122 1123 for i := 0; i < len(values); i += chunkSize { 1124 j := min(chunkSize+i, len(values)) 1125 n := min(chunkSize, len(values)-i) 1126 1127 probe := buffer[:n:n] 1128 writePointersBE128(probe, makeArrayValue(values[i:j], unsafe.Offsetof(values[i].ptr))) 1129 1130 if d.table.Probe(probe, indexes[i:j:j]) > 0 { 1131 for k, v := range probe { 1132 if indexes[i+k] == int32(len(d.values)) { 1133 d.values = append(d.values, v) 1134 } 1135 } 1136 } 1137 } 1138 } 1139 1140 func (d *be128Dictionary) init(indexes []int32) { 1141 d.table = hashprobe.NewUint128Table(cap(d.values), 0.75) 1142 1143 n := min(len(d.values), len(indexes)) 1144 1145 for i := 0; i < len(d.values); i += n { 1146 j := min(i+n, len(d.values)) 1147 d.table.Probe(d.values[i:j:j], indexes[:n:n]) 1148 } 1149 } 1150 1151 func (d *be128Dictionary) insert(indexes []int32, rows sparse.Array) { 1152 const chunkSize = insertsTargetCacheFootprint / 16 1153 1154 if d.table == nil { 1155 d.init(indexes) 1156 } 1157 1158 values := rows.Uint128Array() 1159 1160 for i := 0; i < values.Len(); i += chunkSize { 1161 j := min(i+chunkSize, values.Len()) 1162 1163 if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 { 1164 for k, index := range indexes[i:j] { 1165 if index == int32(len(d.values)) { 1166 d.values = append(d.values, values.Index(i+k)) 1167 } 1168 } 1169 } 1170 } 1171 } 1172 1173 func (d *be128Dictionary) Lookup(indexes []int32, values []Value) { 1174 model := d.makeValueString("") 1175 memsetValues(values, model) 1176 d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr))) 1177 } 1178 1179 func (d *be128Dictionary) Bounds(indexes []int32) (min, max Value) { 1180 if len(indexes) > 0 { 1181 minValue, maxValue := d.bounds(indexes) 1182 min = d.makeValue(minValue) 1183 max = d.makeValue(maxValue) 1184 } 1185 return min, max 1186 } 1187 1188 func (d *be128Dictionary) Reset() { 1189 d.values = d.values[:0] 1190 if d.table != nil { 1191 d.table.Reset() 1192 } 1193 } 1194 1195 func (d *be128Dictionary) Page() BufferedPage { 1196 return &d.be128Page 1197 } 1198 1199 // indexedType is a wrapper around a Type value which overrides object 1200 // constructors to use indexed versions referencing values in the dictionary 1201 // instead of storing plain values. 1202 type indexedType struct { 1203 Type 1204 dict Dictionary 1205 } 1206 1207 func newIndexedType(typ Type, dict Dictionary) *indexedType { 1208 return &indexedType{Type: typ, dict: dict} 1209 } 1210 1211 func (t *indexedType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer { 1212 return newIndexedColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues)) 1213 } 1214 1215 func (t *indexedType) NewPage(columnIndex, numValues int, data []byte) Page { 1216 return newIndexedPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data) 1217 } 1218 1219 // indexedPage is an implementation of the BufferedPage interface which stores 1220 // indexes instead of plain value. The indexes reference the values in a 1221 // dictionary that the page was created for. 1222 type indexedPage struct { 1223 typ *indexedType 1224 values []int32 1225 columnIndex int16 1226 } 1227 1228 func newIndexedPage(typ *indexedType, columnIndex int16, numValues int32, values []byte) *indexedPage { 1229 // RLE encoded values that contain dictionary indexes in data pages are 1230 // sometimes truncated when they contain only zeros. We account for this 1231 // special case here and extend the values buffer if it is shorter than 1232 // needed to hold `numValues`. 1233 size := 4 * int(numValues) 1234 1235 if len(values) < size { 1236 if cap(values) < size { 1237 tmp := make([]byte, size) 1238 copy(tmp, values) 1239 values = tmp 1240 } else { 1241 clear := values[len(values) : len(values)+size] 1242 for i := range clear { 1243 clear[i] = 0 1244 } 1245 } 1246 } 1247 1248 return &indexedPage{ 1249 typ: typ, 1250 values: unsafecast.BytesToInt32(values[:size]), 1251 columnIndex: ^columnIndex, 1252 } 1253 } 1254 1255 func (page *indexedPage) Type() Type { return indexedPageType{page.typ} } 1256 1257 func (page *indexedPage) Column() int { return int(^page.columnIndex) } 1258 1259 func (page *indexedPage) Dictionary() Dictionary { return page.typ.dict } 1260 1261 func (page *indexedPage) NumRows() int64 { return int64(len(page.values)) } 1262 1263 func (page *indexedPage) NumValues() int64 { return int64(len(page.values)) } 1264 1265 func (page *indexedPage) NumNulls() int64 { return 0 } 1266 1267 func (page *indexedPage) Size() int64 { return 4 * int64(len(page.values)) } 1268 1269 func (page *indexedPage) RepetitionLevels() []byte { return nil } 1270 1271 func (page *indexedPage) DefinitionLevels() []byte { return nil } 1272 1273 func (page *indexedPage) Data() []byte { return unsafecast.Int32ToBytes(page.values) } 1274 1275 func (page *indexedPage) Values() ValueReader { return &indexedPageValues{page: page} } 1276 1277 func (page *indexedPage) Buffer() BufferedPage { return page } 1278 1279 func (page *indexedPage) Bounds() (min, max Value, ok bool) { 1280 if ok = len(page.values) > 0; ok { 1281 min, max = page.typ.dict.Bounds(page.values) 1282 min.columnIndex = page.columnIndex 1283 max.columnIndex = page.columnIndex 1284 } 1285 return min, max, ok 1286 } 1287 1288 func (page *indexedPage) Clone() BufferedPage { 1289 return &indexedPage{ 1290 typ: page.typ, 1291 values: append([]int32{}, page.values...), 1292 columnIndex: page.columnIndex, 1293 } 1294 } 1295 1296 func (page *indexedPage) Slice(i, j int64) BufferedPage { 1297 return &indexedPage{ 1298 typ: page.typ, 1299 values: page.values[i:j], 1300 columnIndex: page.columnIndex, 1301 } 1302 } 1303 1304 // indexedPageType is an adapter for the indexedType returned when accessing 1305 // the type of an indexedPage value. It overrides the Encode/Decode methods to 1306 // account for the fact that an indexed page is holding indexes of values into 1307 // its dictionary instead of plain values. 1308 type indexedPageType struct{ *indexedType } 1309 1310 func (t indexedPageType) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) { 1311 return enc.EncodeInt32(dst, src) 1312 } 1313 1314 func (t indexedPageType) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) { 1315 return enc.DecodeInt32(dst, src) 1316 } 1317 1318 type indexedPageValues struct { 1319 page *indexedPage 1320 offset int 1321 } 1322 1323 func (r *indexedPageValues) ReadValues(values []Value) (n int, err error) { 1324 if n = len(r.page.values) - r.offset; n == 0 { 1325 return 0, io.EOF 1326 } 1327 if n > len(values) { 1328 n = len(values) 1329 } 1330 r.page.typ.dict.Lookup(r.page.values[r.offset:r.offset+n], values[:n]) 1331 r.offset += n 1332 if r.offset == len(r.page.values) { 1333 err = io.EOF 1334 } 1335 return n, err 1336 } 1337 1338 // indexedColumnBuffer is an implementation of the ColumnBuffer interface which 1339 // builds a page of indexes into a parent dictionary when values are written. 1340 type indexedColumnBuffer struct{ indexedPage } 1341 1342 func newIndexedColumnBuffer(typ *indexedType, columnIndex int16, numValues int32) *indexedColumnBuffer { 1343 return &indexedColumnBuffer{ 1344 indexedPage: indexedPage{ 1345 typ: typ, 1346 values: make([]int32, 0, numValues), 1347 columnIndex: ^columnIndex, 1348 }, 1349 } 1350 } 1351 1352 func (col *indexedColumnBuffer) Clone() ColumnBuffer { 1353 return &indexedColumnBuffer{ 1354 indexedPage: indexedPage{ 1355 typ: col.typ, 1356 values: append([]int32{}, col.values...), 1357 columnIndex: col.columnIndex, 1358 }, 1359 } 1360 } 1361 1362 func (col *indexedColumnBuffer) ColumnIndex() ColumnIndex { return indexedColumnIndex{col} } 1363 1364 func (col *indexedColumnBuffer) OffsetIndex() OffsetIndex { return indexedOffsetIndex{col} } 1365 1366 func (col *indexedColumnBuffer) BloomFilter() BloomFilter { return nil } 1367 1368 func (col *indexedColumnBuffer) Dictionary() Dictionary { return col.typ.dict } 1369 1370 func (col *indexedColumnBuffer) Pages() Pages { return onePage(col.Page()) } 1371 1372 func (col *indexedColumnBuffer) Page() BufferedPage { return &col.indexedPage } 1373 1374 func (col *indexedColumnBuffer) Reset() { col.values = col.values[:0] } 1375 1376 func (col *indexedColumnBuffer) Cap() int { return cap(col.values) } 1377 1378 func (col *indexedColumnBuffer) Len() int { return len(col.values) } 1379 1380 func (col *indexedColumnBuffer) Less(i, j int) bool { 1381 u := col.typ.dict.Index(col.values[i]) 1382 v := col.typ.dict.Index(col.values[j]) 1383 return col.typ.Compare(u, v) < 0 1384 } 1385 1386 func (col *indexedColumnBuffer) Swap(i, j int) { 1387 col.values[i], col.values[j] = col.values[j], col.values[i] 1388 } 1389 1390 func (col *indexedColumnBuffer) WriteValues(values []Value) (int, error) { 1391 i := len(col.values) 1392 j := len(col.values) + len(values) 1393 1394 if j <= cap(col.values) { 1395 col.values = col.values[:j] 1396 } else { 1397 tmp := make([]int32, j, 2*j) 1398 copy(tmp, col.values) 1399 col.values = tmp 1400 } 1401 1402 col.typ.dict.Insert(col.values[i:], values) 1403 return len(values), nil 1404 } 1405 1406 func (col *indexedColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) { 1407 i := len(col.values) 1408 j := len(col.values) + rows.Len() 1409 1410 if j <= cap(col.values) { 1411 col.values = col.values[:j] 1412 } else { 1413 tmp := make([]int32, j, 2*j) 1414 copy(tmp, col.values) 1415 col.values = tmp 1416 } 1417 1418 col.typ.dict.insert(col.values[i:], rows) 1419 } 1420 1421 func (col *indexedColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) { 1422 i := int(offset) 1423 switch { 1424 case i < 0: 1425 return 0, errRowIndexOutOfBounds(offset, int64(len(col.values))) 1426 case i >= len(col.values): 1427 return 0, io.EOF 1428 default: 1429 for n < len(values) && i < len(col.values) { 1430 values[n] = col.typ.dict.Index(col.values[i]) 1431 values[n].columnIndex = col.columnIndex 1432 n++ 1433 i++ 1434 } 1435 if n < len(values) { 1436 err = io.EOF 1437 } 1438 return n, err 1439 } 1440 } 1441 1442 func (col *indexedColumnBuffer) ReadRowAt(row Row, index int64) (Row, error) { 1443 switch { 1444 case index < 0: 1445 return row, errRowIndexOutOfBounds(index, int64(len(col.values))) 1446 case index >= int64(len(col.values)): 1447 return row, io.EOF 1448 default: 1449 v := col.typ.dict.Index(col.values[index]) 1450 v.columnIndex = col.columnIndex 1451 return append(row, v), nil 1452 } 1453 } 1454 1455 type indexedColumnIndex struct{ col *indexedColumnBuffer } 1456 1457 func (index indexedColumnIndex) NumPages() int { return 1 } 1458 func (index indexedColumnIndex) NullCount(int) int64 { return 0 } 1459 func (index indexedColumnIndex) NullPage(int) bool { return false } 1460 func (index indexedColumnIndex) MinValue(int) Value { 1461 min, _, _ := index.col.Bounds() 1462 return min 1463 } 1464 func (index indexedColumnIndex) MaxValue(int) Value { 1465 _, max, _ := index.col.Bounds() 1466 return max 1467 } 1468 func (index indexedColumnIndex) IsAscending() bool { 1469 min, max, _ := index.col.Bounds() 1470 return index.col.typ.Compare(min, max) <= 0 1471 } 1472 func (index indexedColumnIndex) IsDescending() bool { 1473 min, max, _ := index.col.Bounds() 1474 return index.col.typ.Compare(min, max) > 0 1475 } 1476 1477 type indexedOffsetIndex struct{ col *indexedColumnBuffer } 1478 1479 func (index indexedOffsetIndex) NumPages() int { return 1 } 1480 func (index indexedOffsetIndex) Offset(int) int64 { return 0 } 1481 func (index indexedOffsetIndex) CompressedPageSize(int) int64 { return index.col.Size() } 1482 func (index indexedOffsetIndex) FirstRowIndex(int) int64 { return 0 }