github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/dictionary.go (about) 1 package parquet 2 3 import ( 4 "io" 5 "math/bits" 6 "unsafe" 7 8 "github.com/segmentio/parquet-go/deprecated" 9 "github.com/segmentio/parquet-go/encoding" 10 "github.com/segmentio/parquet-go/encoding/plain" 11 "github.com/segmentio/parquet-go/hashprobe" 12 "github.com/segmentio/parquet-go/internal/bitpack" 13 "github.com/segmentio/parquet-go/internal/unsafecast" 14 "github.com/segmentio/parquet-go/sparse" 15 ) 16 17 const ( 18 // Maximum load of probing tables. This parameter configures the balance 19 // between memory density and compute time of probing operations. Valid 20 // values are floating point numbers between 0 and 1. 21 // 22 // Smaller values result in lower collision probability when inserting 23 // values in probing tables, but also increase memory utilization. 24 // 25 // TODO: make this configurable by the application? 26 hashprobeTableMaxLoad = 0.85 27 28 // An estimate of the CPU cache footprint used by insert operations. 29 // 30 // This constant is used to determine a useful chunk size depending on the 31 // size of values being inserted in dictionaries. More values of small size 32 // can fit in CPU caches, so the inserts can operation on larger chunks. 33 insertsTargetCacheFootprint = 8192 34 ) 35 36 // The Dictionary interface represents type-specific implementations of parquet 37 // dictionaries. 38 // 39 // Programs can instantiate dictionaries by call the NewDictionary method of a 40 // Type object. 41 // 42 // The current implementation has a limitation which prevents applications from 43 // providing custom versions of this interface because it contains unexported 44 // methods. The only way to create Dictionary values is to call the 45 // NewDictionary of Type instances. This limitation may be lifted in future 46 // releases. 47 type Dictionary interface { 48 // Returns the type that the dictionary was created from. 49 Type() Type 50 51 // Returns the number of value indexed in the dictionary. 52 Len() int 53 54 // Returns the dictionary value at the given index. 55 Index(index int32) Value 56 57 // Inserts values from the second slice to the dictionary and writes the 58 // indexes at which each value was inserted to the first slice. 59 // 60 // The method panics if the length of the indexes slice is smaller than the 61 // length of the values slice. 62 Insert(indexes []int32, values []Value) 63 64 // Given an array of dictionary indexes, lookup the values into the array 65 // of values passed as second argument. 66 // 67 // The method panics if len(indexes) > len(values), or one of the indexes 68 // is negative or greater than the highest index in the dictionary. 69 Lookup(indexes []int32, values []Value) 70 71 // Returns the min and max values found in the given indexes. 72 Bounds(indexes []int32) (min, max Value) 73 74 // Resets the dictionary to its initial state, removing all values. 75 Reset() 76 77 // Returns a Page representing the content of the dictionary. 78 // 79 // The returned page shares the underlying memory of the buffer, it remains 80 // valid to use until the dictionary's Reset method is called. 81 Page() Page 82 83 // See ColumnBuffer.writeValues for details on the use of unexported methods 84 // on interfaces. 85 insert(indexes []int32, rows sparse.Array) 86 //lookup(indexes []int32, rows sparse.Array) 87 } 88 89 func checkLookupIndexBounds(indexes []int32, rows sparse.Array) { 90 if rows.Len() < len(indexes) { 91 panic("dictionary lookup with more indexes than values") 92 } 93 } 94 95 // The boolean dictionary always contains two values for true and false. 96 type booleanDictionary struct { 97 booleanPage 98 // There are only two possible values for booleans, false and true. 99 // Rather than using a Go map, we track the indexes of each values 100 // in an array of two 32 bits integers. When inserting values in the 101 // dictionary, we ensure that an index exist for each boolean value, 102 // then use the value 0 or 1 (false or true) to perform a lookup in 103 // the dictionary's map. 104 table [2]int32 105 } 106 107 func newBooleanDictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *booleanDictionary { 108 indexOfFalse, indexOfTrue, values := int32(-1), int32(-1), data.Boolean() 109 110 for i := int32(0); i < numValues && indexOfFalse < 0 && indexOfTrue < 0; i += 8 { 111 v := values[i] 112 if v != 0x00 { 113 indexOfTrue = i + int32(bits.TrailingZeros8(v)) 114 } 115 if v != 0xFF { 116 indexOfFalse = i + int32(bits.TrailingZeros8(^v)) 117 } 118 } 119 120 return &booleanDictionary{ 121 booleanPage: booleanPage{ 122 typ: typ, 123 bits: values[:bitpack.ByteCount(uint(numValues))], 124 numValues: numValues, 125 columnIndex: ^columnIndex, 126 }, 127 table: [2]int32{ 128 0: indexOfFalse, 129 1: indexOfTrue, 130 }, 131 } 132 } 133 134 func (d *booleanDictionary) Type() Type { return newIndexedType(d.typ, d) } 135 136 func (d *booleanDictionary) Len() int { return int(d.numValues) } 137 138 func (d *booleanDictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 139 140 func (d *booleanDictionary) index(i int32) bool { return d.valueAt(int(i)) } 141 142 func (d *booleanDictionary) Insert(indexes []int32, values []Value) { 143 model := Value{} 144 d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 145 } 146 147 func (d *booleanDictionary) insert(indexes []int32, rows sparse.Array) { 148 _ = indexes[:rows.Len()] 149 150 if d.table[0] < 0 { 151 d.table[0] = d.numValues 152 d.numValues++ 153 d.bits = plain.AppendBoolean(d.bits, int(d.table[0]), false) 154 } 155 156 if d.table[1] < 0 { 157 d.table[1] = d.numValues 158 d.numValues++ 159 d.bits = plain.AppendBoolean(d.bits, int(d.table[1]), true) 160 } 161 162 values := rows.Uint8Array() 163 dict := d.table 164 165 for i := 0; i < rows.Len(); i++ { 166 v := values.Index(i) & 1 167 indexes[i] = dict[v] 168 } 169 } 170 171 func (d *booleanDictionary) Lookup(indexes []int32, values []Value) { 172 model := d.makeValue(false) 173 memsetValues(values, model) 174 d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 175 } 176 177 func (d *booleanDictionary) lookup(indexes []int32, rows sparse.Array) { 178 checkLookupIndexBounds(indexes, rows) 179 for i, j := range indexes { 180 *(*bool)(rows.Index(i)) = d.index(j) 181 } 182 } 183 184 func (d *booleanDictionary) Bounds(indexes []int32) (min, max Value) { 185 if len(indexes) > 0 { 186 hasFalse, hasTrue := false, false 187 188 for _, i := range indexes { 189 v := d.index(i) 190 if v { 191 hasTrue = true 192 } else { 193 hasFalse = true 194 } 195 if hasTrue && hasFalse { 196 break 197 } 198 } 199 200 min = d.makeValue(!hasFalse) 201 max = d.makeValue(hasTrue) 202 } 203 return min, max 204 } 205 206 func (d *booleanDictionary) Reset() { 207 d.bits = d.bits[:0] 208 d.offset = 0 209 d.numValues = 0 210 d.table = [2]int32{-1, -1} 211 } 212 213 func (d *booleanDictionary) Page() Page { 214 return &d.booleanPage 215 } 216 217 type int32Dictionary struct { 218 int32Page 219 table *hashprobe.Int32Table 220 } 221 222 func newInt32Dictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *int32Dictionary { 223 return &int32Dictionary{ 224 int32Page: int32Page{ 225 typ: typ, 226 values: data.Int32()[:numValues], 227 columnIndex: ^columnIndex, 228 }, 229 } 230 } 231 232 func (d *int32Dictionary) Type() Type { return newIndexedType(d.typ, d) } 233 234 func (d *int32Dictionary) Len() int { return len(d.values) } 235 236 func (d *int32Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 237 238 func (d *int32Dictionary) index(i int32) int32 { return d.values[i] } 239 240 func (d *int32Dictionary) Insert(indexes []int32, values []Value) { 241 model := Value{} 242 d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 243 } 244 245 func (d *int32Dictionary) init(indexes []int32) { 246 d.table = hashprobe.NewInt32Table(len(d.values), hashprobeTableMaxLoad) 247 248 n := min(len(d.values), len(indexes)) 249 250 for i := 0; i < len(d.values); i += n { 251 j := min(i+n, len(d.values)) 252 d.table.Probe(d.values[i:j:j], indexes[:n:n]) 253 } 254 } 255 256 func (d *int32Dictionary) insert(indexes []int32, rows sparse.Array) { 257 // Iterating over the input in chunks helps keep relevant data in CPU 258 // caches when a large number of values are inserted into the dictionary with 259 // a single method call. 260 // 261 // Without this chunking, memory areas from the head of the indexes and 262 // values arrays end up being evicted from CPU caches as the probing 263 // operation iterates through the array. The subsequent scan of the indexes 264 // required to determine which values must be inserted into the page then 265 // stalls on retrieving data from main memory. 266 // 267 // We measured as much as ~37% drop in throughput when disabling the 268 // chunking, and did not observe any penalties from having it on smaller 269 // inserts. 270 const chunkSize = insertsTargetCacheFootprint / 4 271 272 if d.table == nil { 273 d.init(indexes) 274 } 275 276 values := rows.Int32Array() 277 278 for i := 0; i < values.Len(); i += chunkSize { 279 j := min(i+chunkSize, values.Len()) 280 281 if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 { 282 for k, index := range indexes[i:j] { 283 if index == int32(len(d.values)) { 284 d.values = append(d.values, values.Index(i+k)) 285 } 286 } 287 } 288 } 289 } 290 291 func (d *int32Dictionary) Lookup(indexes []int32, values []Value) { 292 model := d.makeValue(0) 293 memsetValues(values, model) 294 d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 295 } 296 297 func (d *int32Dictionary) Bounds(indexes []int32) (min, max Value) { 298 if len(indexes) > 0 { 299 minValue, maxValue := d.bounds(indexes) 300 min = d.makeValue(minValue) 301 max = d.makeValue(maxValue) 302 } 303 return min, max 304 } 305 306 func (d *int32Dictionary) Reset() { 307 d.values = d.values[:0] 308 if d.table != nil { 309 d.table.Reset() 310 } 311 } 312 313 func (d *int32Dictionary) Page() Page { 314 return &d.int32Page 315 } 316 317 type int64Dictionary struct { 318 int64Page 319 table *hashprobe.Int64Table 320 } 321 322 func newInt64Dictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *int64Dictionary { 323 return &int64Dictionary{ 324 int64Page: int64Page{ 325 typ: typ, 326 values: data.Int64()[:numValues], 327 columnIndex: ^columnIndex, 328 }, 329 } 330 } 331 332 func (d *int64Dictionary) Type() Type { return newIndexedType(d.typ, d) } 333 334 func (d *int64Dictionary) Len() int { return len(d.values) } 335 336 func (d *int64Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 337 338 func (d *int64Dictionary) index(i int32) int64 { return d.values[i] } 339 340 func (d *int64Dictionary) Insert(indexes []int32, values []Value) { 341 model := Value{} 342 d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 343 } 344 345 func (d *int64Dictionary) init(indexes []int32) { 346 d.table = hashprobe.NewInt64Table(len(d.values), hashprobeTableMaxLoad) 347 348 n := min(len(d.values), len(indexes)) 349 350 for i := 0; i < len(d.values); i += n { 351 j := min(i+n, len(d.values)) 352 d.table.Probe(d.values[i:j:j], indexes[:n:n]) 353 } 354 } 355 356 func (d *int64Dictionary) insert(indexes []int32, rows sparse.Array) { 357 const chunkSize = insertsTargetCacheFootprint / 8 358 359 if d.table == nil { 360 d.init(indexes) 361 } 362 363 values := rows.Int64Array() 364 365 for i := 0; i < values.Len(); i += chunkSize { 366 j := min(i+chunkSize, values.Len()) 367 368 if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 { 369 for k, index := range indexes[i:j] { 370 if index == int32(len(d.values)) { 371 d.values = append(d.values, values.Index(i+k)) 372 } 373 } 374 } 375 } 376 } 377 378 func (d *int64Dictionary) Lookup(indexes []int32, values []Value) { 379 model := d.makeValue(0) 380 memsetValues(values, model) 381 d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 382 } 383 384 func (d *int64Dictionary) Bounds(indexes []int32) (min, max Value) { 385 if len(indexes) > 0 { 386 minValue, maxValue := d.bounds(indexes) 387 min = d.makeValue(minValue) 388 max = d.makeValue(maxValue) 389 } 390 return min, max 391 } 392 393 func (d *int64Dictionary) Reset() { 394 d.values = d.values[:0] 395 if d.table != nil { 396 d.table.Reset() 397 } 398 } 399 400 func (d *int64Dictionary) Page() Page { 401 return &d.int64Page 402 } 403 404 type int96Dictionary struct { 405 int96Page 406 hashmap map[deprecated.Int96]int32 407 } 408 409 func newInt96Dictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *int96Dictionary { 410 return &int96Dictionary{ 411 int96Page: int96Page{ 412 typ: typ, 413 values: data.Int96()[:numValues], 414 columnIndex: ^columnIndex, 415 }, 416 } 417 } 418 419 func (d *int96Dictionary) Type() Type { return newIndexedType(d.typ, d) } 420 421 func (d *int96Dictionary) Len() int { return len(d.values) } 422 423 func (d *int96Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 424 425 func (d *int96Dictionary) index(i int32) deprecated.Int96 { return d.values[i] } 426 427 func (d *int96Dictionary) Insert(indexes []int32, values []Value) { 428 d.insertValues(indexes, len(values), func(i int) deprecated.Int96 { 429 return values[i].Int96() 430 }) 431 } 432 433 func (d *int96Dictionary) insert(indexes []int32, rows sparse.Array) { 434 d.insertValues(indexes, rows.Len(), func(i int) deprecated.Int96 { 435 return *(*deprecated.Int96)(rows.Index(i)) 436 }) 437 } 438 439 func (d *int96Dictionary) insertValues(indexes []int32, count int, valueAt func(int) deprecated.Int96) { 440 _ = indexes[:count] 441 442 if d.hashmap == nil { 443 d.hashmap = make(map[deprecated.Int96]int32, len(d.values)) 444 for i, v := range d.values { 445 d.hashmap[v] = int32(i) 446 } 447 } 448 449 for i := 0; i < count; i++ { 450 value := valueAt(i) 451 452 index, exists := d.hashmap[value] 453 if !exists { 454 index = int32(len(d.values)) 455 d.values = append(d.values, value) 456 d.hashmap[value] = index 457 } 458 459 indexes[i] = index 460 } 461 } 462 463 func (d *int96Dictionary) Lookup(indexes []int32, values []Value) { 464 for i, j := range indexes { 465 values[i] = d.Index(j) 466 } 467 } 468 469 func (d *int96Dictionary) Bounds(indexes []int32) (min, max Value) { 470 if len(indexes) > 0 { 471 minValue := d.index(indexes[0]) 472 maxValue := minValue 473 474 for _, i := range indexes[1:] { 475 value := d.index(i) 476 switch { 477 case value.Less(minValue): 478 minValue = value 479 case maxValue.Less(value): 480 maxValue = value 481 } 482 } 483 484 min = d.makeValue(minValue) 485 max = d.makeValue(maxValue) 486 } 487 return min, max 488 } 489 490 func (d *int96Dictionary) Reset() { 491 d.values = d.values[:0] 492 d.hashmap = nil 493 } 494 495 func (d *int96Dictionary) Page() Page { 496 return &d.int96Page 497 } 498 499 type floatDictionary struct { 500 floatPage 501 table *hashprobe.Float32Table 502 } 503 504 func newFloatDictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *floatDictionary { 505 return &floatDictionary{ 506 floatPage: floatPage{ 507 typ: typ, 508 values: data.Float()[:numValues], 509 columnIndex: ^columnIndex, 510 }, 511 } 512 } 513 514 func (d *floatDictionary) Type() Type { return newIndexedType(d.typ, d) } 515 516 func (d *floatDictionary) Len() int { return len(d.values) } 517 518 func (d *floatDictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 519 520 func (d *floatDictionary) index(i int32) float32 { return d.values[i] } 521 522 func (d *floatDictionary) Insert(indexes []int32, values []Value) { 523 model := Value{} 524 d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 525 } 526 527 func (d *floatDictionary) init(indexes []int32) { 528 d.table = hashprobe.NewFloat32Table(len(d.values), hashprobeTableMaxLoad) 529 530 n := min(len(d.values), len(indexes)) 531 532 for i := 0; i < len(d.values); i += n { 533 j := min(i+n, len(d.values)) 534 d.table.Probe(d.values[i:j:j], indexes[:n:n]) 535 } 536 } 537 538 func (d *floatDictionary) insert(indexes []int32, rows sparse.Array) { 539 const chunkSize = insertsTargetCacheFootprint / 4 540 541 if d.table == nil { 542 d.init(indexes) 543 } 544 545 values := rows.Float32Array() 546 547 for i := 0; i < values.Len(); i += chunkSize { 548 j := min(i+chunkSize, values.Len()) 549 550 if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 { 551 for k, index := range indexes[i:j] { 552 if index == int32(len(d.values)) { 553 d.values = append(d.values, values.Index(i+k)) 554 } 555 } 556 } 557 } 558 } 559 560 func (d *floatDictionary) Lookup(indexes []int32, values []Value) { 561 model := d.makeValue(0) 562 memsetValues(values, model) 563 d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 564 } 565 566 func (d *floatDictionary) Bounds(indexes []int32) (min, max Value) { 567 if len(indexes) > 0 { 568 minValue, maxValue := d.bounds(indexes) 569 min = d.makeValue(minValue) 570 max = d.makeValue(maxValue) 571 } 572 return min, max 573 } 574 575 func (d *floatDictionary) Reset() { 576 d.values = d.values[:0] 577 if d.table != nil { 578 d.table.Reset() 579 } 580 } 581 582 func (d *floatDictionary) Page() Page { 583 return &d.floatPage 584 } 585 586 type doubleDictionary struct { 587 doublePage 588 table *hashprobe.Float64Table 589 } 590 591 func newDoubleDictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *doubleDictionary { 592 return &doubleDictionary{ 593 doublePage: doublePage{ 594 typ: typ, 595 values: data.Double()[:numValues], 596 columnIndex: ^columnIndex, 597 }, 598 } 599 } 600 601 func (d *doubleDictionary) Type() Type { return newIndexedType(d.typ, d) } 602 603 func (d *doubleDictionary) Len() int { return len(d.values) } 604 605 func (d *doubleDictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 606 607 func (d *doubleDictionary) index(i int32) float64 { return d.values[i] } 608 609 func (d *doubleDictionary) Insert(indexes []int32, values []Value) { 610 model := Value{} 611 d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 612 } 613 614 func (d *doubleDictionary) init(indexes []int32) { 615 d.table = hashprobe.NewFloat64Table(len(d.values), hashprobeTableMaxLoad) 616 617 n := min(len(d.values), len(indexes)) 618 619 for i := 0; i < len(d.values); i += n { 620 j := min(i+n, len(d.values)) 621 d.table.Probe(d.values[i:j:j], indexes[:n:n]) 622 } 623 } 624 625 func (d *doubleDictionary) insert(indexes []int32, rows sparse.Array) { 626 const chunkSize = insertsTargetCacheFootprint / 8 627 628 if d.table == nil { 629 d.init(indexes) 630 } 631 632 values := rows.Float64Array() 633 634 for i := 0; i < values.Len(); i += chunkSize { 635 j := min(i+chunkSize, values.Len()) 636 637 if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 { 638 for k, index := range indexes[i:j] { 639 if index == int32(len(d.values)) { 640 d.values = append(d.values, values.Index(i+k)) 641 } 642 } 643 } 644 } 645 } 646 647 func (d *doubleDictionary) Lookup(indexes []int32, values []Value) { 648 model := d.makeValue(0) 649 memsetValues(values, model) 650 d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 651 } 652 653 func (d *doubleDictionary) Bounds(indexes []int32) (min, max Value) { 654 if len(indexes) > 0 { 655 minValue, maxValue := d.bounds(indexes) 656 min = d.makeValue(minValue) 657 max = d.makeValue(maxValue) 658 } 659 return min, max 660 } 661 662 func (d *doubleDictionary) Reset() { 663 d.values = d.values[:0] 664 if d.table != nil { 665 d.table.Reset() 666 } 667 } 668 669 func (d *doubleDictionary) Page() Page { 670 return &d.doublePage 671 } 672 673 type byteArrayDictionary struct { 674 byteArrayPage 675 table map[string]int32 676 alloc allocator 677 } 678 679 func newByteArrayDictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *byteArrayDictionary { 680 values, offsets := data.ByteArray() 681 // The first offset must always be zero, and the last offset is the length 682 // of the values in bytes. 683 // 684 // As an optimization we make the assumption that the backing array of the 685 // offsets slice belongs to the dictionary. 686 switch { 687 case cap(offsets) == 0: 688 offsets = make([]uint32, 1, 8) 689 case len(offsets) == 0: 690 offsets = append(offsets[:0], 0) 691 } 692 return &byteArrayDictionary{ 693 byteArrayPage: byteArrayPage{ 694 typ: typ, 695 values: values, 696 offsets: offsets, 697 columnIndex: ^columnIndex, 698 }, 699 } 700 } 701 702 func (d *byteArrayDictionary) Type() Type { return newIndexedType(d.typ, d) } 703 704 func (d *byteArrayDictionary) Len() int { return d.len() } 705 706 func (d *byteArrayDictionary) Index(i int32) Value { return d.makeValueBytes(d.index(int(i))) } 707 708 func (d *byteArrayDictionary) Insert(indexes []int32, values []Value) { 709 model := Value{} 710 d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr))) 711 } 712 713 func (d *byteArrayDictionary) init() { 714 numValues := d.len() 715 d.table = make(map[string]int32, numValues) 716 717 for i := 0; i < numValues; i++ { 718 d.table[string(d.index(i))] = int32(len(d.table)) 719 } 720 } 721 722 func (d *byteArrayDictionary) insert(indexes []int32, rows sparse.Array) { 723 if d.table == nil { 724 d.init() 725 } 726 727 values := rows.StringArray() 728 729 for i := range indexes { 730 value := values.Index(i) 731 732 index, exists := d.table[value] 733 if !exists { 734 value = d.alloc.copyString(value) 735 index = int32(len(d.table)) 736 d.table[value] = index 737 d.values = append(d.values, value...) 738 d.offsets = append(d.offsets, uint32(len(d.values))) 739 } 740 741 indexes[i] = index 742 } 743 } 744 745 func (d *byteArrayDictionary) Lookup(indexes []int32, values []Value) { 746 model := d.makeValueString("") 747 memsetValues(values, model) 748 d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr))) 749 } 750 751 func (d *byteArrayDictionary) Bounds(indexes []int32) (min, max Value) { 752 if len(indexes) > 0 { 753 base := d.index(int(indexes[0])) 754 minValue := unsafecast.BytesToString(base) 755 maxValue := minValue 756 values := [64]string{} 757 758 for i := 1; i < len(indexes); i += len(values) { 759 n := len(indexes) - i 760 if n > len(values) { 761 n = len(values) 762 } 763 j := i + n 764 d.lookupString(indexes[i:j:j], makeArrayString(values[:n:n])) 765 766 for _, value := range values[:n:n] { 767 switch { 768 case value < minValue: 769 minValue = value 770 case value > maxValue: 771 maxValue = value 772 } 773 } 774 } 775 776 min = d.makeValueString(minValue) 777 max = d.makeValueString(maxValue) 778 } 779 return min, max 780 } 781 782 func (d *byteArrayDictionary) Reset() { 783 d.offsets = d.offsets[:1] 784 d.values = d.values[:0] 785 for k := range d.table { 786 delete(d.table, k) 787 } 788 d.alloc.reset() 789 } 790 791 func (d *byteArrayDictionary) Page() Page { 792 return &d.byteArrayPage 793 } 794 795 type fixedLenByteArrayDictionary struct { 796 fixedLenByteArrayPage 797 hashmap map[string]int32 798 } 799 800 func newFixedLenByteArrayDictionary(typ Type, columnIndex int16, numValues int32, values encoding.Values) *fixedLenByteArrayDictionary { 801 data, size := values.FixedLenByteArray() 802 return &fixedLenByteArrayDictionary{ 803 fixedLenByteArrayPage: fixedLenByteArrayPage{ 804 typ: typ, 805 size: size, 806 data: data, 807 columnIndex: ^columnIndex, 808 }, 809 } 810 } 811 812 func (d *fixedLenByteArrayDictionary) Type() Type { return newIndexedType(d.typ, d) } 813 814 func (d *fixedLenByteArrayDictionary) Len() int { return len(d.data) / d.size } 815 816 func (d *fixedLenByteArrayDictionary) Index(i int32) Value { 817 return d.makeValueBytes(d.index(i)) 818 } 819 820 func (d *fixedLenByteArrayDictionary) index(i int32) []byte { 821 j := (int(i) + 0) * d.size 822 k := (int(i) + 1) * d.size 823 return d.data[j:k:k] 824 } 825 826 func (d *fixedLenByteArrayDictionary) Insert(indexes []int32, values []Value) { 827 d.insertValues(indexes, len(values), func(i int) *byte { 828 return values[i].ptr 829 }) 830 } 831 832 func (d *fixedLenByteArrayDictionary) insert(indexes []int32, rows sparse.Array) { 833 d.insertValues(indexes, rows.Len(), func(i int) *byte { 834 return (*byte)(rows.Index(i)) 835 }) 836 } 837 838 func (d *fixedLenByteArrayDictionary) insertValues(indexes []int32, count int, valueAt func(int) *byte) { 839 _ = indexes[:count] 840 841 if d.hashmap == nil { 842 d.hashmap = make(map[string]int32, cap(d.data)/d.size) 843 for i, j := 0, int32(0); i < len(d.data); i += d.size { 844 d.hashmap[string(d.data[i:i+d.size])] = j 845 j++ 846 } 847 } 848 849 for i := 0; i < count; i++ { 850 value := unsafe.Slice(valueAt(i), d.size) 851 852 index, exists := d.hashmap[string(value)] 853 if !exists { 854 index = int32(d.Len()) 855 start := len(d.data) 856 d.data = append(d.data, value...) 857 d.hashmap[string(d.data[start:])] = index 858 } 859 860 indexes[i] = index 861 } 862 } 863 864 func (d *fixedLenByteArrayDictionary) Lookup(indexes []int32, values []Value) { 865 model := d.makeValueString("") 866 memsetValues(values, model) 867 d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr))) 868 } 869 870 func (d *fixedLenByteArrayDictionary) Bounds(indexes []int32) (min, max Value) { 871 if len(indexes) > 0 { 872 base := d.index(indexes[0]) 873 minValue := unsafecast.BytesToString(base) 874 maxValue := minValue 875 values := [64]string{} 876 877 for i := 1; i < len(indexes); i += len(values) { 878 n := len(indexes) - i 879 if n > len(values) { 880 n = len(values) 881 } 882 j := i + n 883 d.lookupString(indexes[i:j:j], makeArrayString(values[:n:n])) 884 885 for _, value := range values[:n:n] { 886 switch { 887 case value < minValue: 888 minValue = value 889 case value > maxValue: 890 maxValue = value 891 } 892 } 893 } 894 895 min = d.makeValueString(minValue) 896 max = d.makeValueString(maxValue) 897 } 898 return min, max 899 } 900 901 func (d *fixedLenByteArrayDictionary) Reset() { 902 d.data = d.data[:0] 903 d.hashmap = nil 904 } 905 906 func (d *fixedLenByteArrayDictionary) Page() Page { 907 return &d.fixedLenByteArrayPage 908 } 909 910 type uint32Dictionary struct { 911 uint32Page 912 table *hashprobe.Uint32Table 913 } 914 915 func newUint32Dictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *uint32Dictionary { 916 return &uint32Dictionary{ 917 uint32Page: uint32Page{ 918 typ: typ, 919 values: data.Uint32()[:numValues], 920 columnIndex: ^columnIndex, 921 }, 922 } 923 } 924 925 func (d *uint32Dictionary) Type() Type { return newIndexedType(d.typ, d) } 926 927 func (d *uint32Dictionary) Len() int { return len(d.values) } 928 929 func (d *uint32Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 930 931 func (d *uint32Dictionary) index(i int32) uint32 { return d.values[i] } 932 933 func (d *uint32Dictionary) Insert(indexes []int32, values []Value) { 934 model := Value{} 935 d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 936 } 937 938 func (d *uint32Dictionary) init(indexes []int32) { 939 d.table = hashprobe.NewUint32Table(len(d.values), hashprobeTableMaxLoad) 940 941 n := min(len(d.values), len(indexes)) 942 943 for i := 0; i < len(d.values); i += n { 944 j := min(i+n, len(d.values)) 945 d.table.Probe(d.values[i:j:j], indexes[:n:n]) 946 } 947 } 948 949 func (d *uint32Dictionary) insert(indexes []int32, rows sparse.Array) { 950 const chunkSize = insertsTargetCacheFootprint / 4 951 952 if d.table == nil { 953 d.init(indexes) 954 } 955 956 values := rows.Uint32Array() 957 958 for i := 0; i < values.Len(); i += chunkSize { 959 j := min(i+chunkSize, values.Len()) 960 961 if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 { 962 for k, index := range indexes[i:j] { 963 if index == int32(len(d.values)) { 964 d.values = append(d.values, values.Index(i+k)) 965 } 966 } 967 } 968 } 969 } 970 971 func (d *uint32Dictionary) Lookup(indexes []int32, values []Value) { 972 model := d.makeValue(0) 973 memsetValues(values, model) 974 d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 975 } 976 977 func (d *uint32Dictionary) Bounds(indexes []int32) (min, max Value) { 978 if len(indexes) > 0 { 979 minValue, maxValue := d.bounds(indexes) 980 min = d.makeValue(minValue) 981 max = d.makeValue(maxValue) 982 } 983 return min, max 984 } 985 986 func (d *uint32Dictionary) Reset() { 987 d.values = d.values[:0] 988 if d.table != nil { 989 d.table.Reset() 990 } 991 } 992 993 func (d *uint32Dictionary) Page() Page { 994 return &d.uint32Page 995 } 996 997 type uint64Dictionary struct { 998 uint64Page 999 table *hashprobe.Uint64Table 1000 } 1001 1002 func newUint64Dictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *uint64Dictionary { 1003 return &uint64Dictionary{ 1004 uint64Page: uint64Page{ 1005 typ: typ, 1006 values: data.Uint64()[:numValues], 1007 columnIndex: ^columnIndex, 1008 }, 1009 } 1010 } 1011 1012 func (d *uint64Dictionary) Type() Type { return newIndexedType(d.typ, d) } 1013 1014 func (d *uint64Dictionary) Len() int { return len(d.values) } 1015 1016 func (d *uint64Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 1017 1018 func (d *uint64Dictionary) index(i int32) uint64 { return d.values[i] } 1019 1020 func (d *uint64Dictionary) Insert(indexes []int32, values []Value) { 1021 model := Value{} 1022 d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 1023 } 1024 1025 func (d *uint64Dictionary) init(indexes []int32) { 1026 d.table = hashprobe.NewUint64Table(len(d.values), hashprobeTableMaxLoad) 1027 1028 n := min(len(d.values), len(indexes)) 1029 1030 for i := 0; i < len(d.values); i += n { 1031 j := min(i+n, len(d.values)) 1032 d.table.Probe(d.values[i:j:j], indexes[:n:n]) 1033 } 1034 } 1035 1036 func (d *uint64Dictionary) insert(indexes []int32, rows sparse.Array) { 1037 const chunkSize = insertsTargetCacheFootprint / 8 1038 1039 if d.table == nil { 1040 d.init(indexes) 1041 } 1042 1043 values := rows.Uint64Array() 1044 1045 for i := 0; i < values.Len(); i += chunkSize { 1046 j := min(i+chunkSize, values.Len()) 1047 1048 if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 { 1049 for k, index := range indexes[i:j] { 1050 if index == int32(len(d.values)) { 1051 d.values = append(d.values, values.Index(i+k)) 1052 } 1053 } 1054 } 1055 } 1056 } 1057 1058 func (d *uint64Dictionary) Lookup(indexes []int32, values []Value) { 1059 model := d.makeValue(0) 1060 memsetValues(values, model) 1061 d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64))) 1062 } 1063 1064 func (d *uint64Dictionary) Bounds(indexes []int32) (min, max Value) { 1065 if len(indexes) > 0 { 1066 minValue, maxValue := d.bounds(indexes) 1067 min = d.makeValue(minValue) 1068 max = d.makeValue(maxValue) 1069 } 1070 return min, max 1071 } 1072 1073 func (d *uint64Dictionary) Reset() { 1074 d.values = d.values[:0] 1075 if d.table != nil { 1076 d.table.Reset() 1077 } 1078 } 1079 1080 func (d *uint64Dictionary) Page() Page { 1081 return &d.uint64Page 1082 } 1083 1084 type be128Dictionary struct { 1085 be128Page 1086 table *hashprobe.Uint128Table 1087 } 1088 1089 func newBE128Dictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *be128Dictionary { 1090 return &be128Dictionary{ 1091 be128Page: be128Page{ 1092 typ: typ, 1093 values: data.Uint128()[:numValues], 1094 columnIndex: ^columnIndex, 1095 }, 1096 } 1097 } 1098 1099 func (d *be128Dictionary) Type() Type { return newIndexedType(d.typ, d) } 1100 1101 func (d *be128Dictionary) Len() int { return len(d.values) } 1102 1103 func (d *be128Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) } 1104 1105 func (d *be128Dictionary) index(i int32) *[16]byte { return &d.values[i] } 1106 1107 func (d *be128Dictionary) Insert(indexes []int32, values []Value) { 1108 _ = indexes[:len(values)] 1109 1110 for _, v := range values { 1111 if v.kind != ^int8(FixedLenByteArray) { 1112 panic("values inserted in BE128 dictionary must be of type BYTE_ARRAY") 1113 } 1114 if v.u64 != 16 { 1115 panic("values inserted in BE128 dictionary must be of length 16") 1116 } 1117 } 1118 1119 if d.table == nil { 1120 d.init(indexes) 1121 } 1122 1123 const chunkSize = insertsTargetCacheFootprint / 16 1124 var buffer [chunkSize][16]byte 1125 1126 for i := 0; i < len(values); i += chunkSize { 1127 j := min(chunkSize+i, len(values)) 1128 n := min(chunkSize, len(values)-i) 1129 1130 probe := buffer[:n:n] 1131 writePointersBE128(probe, makeArrayValue(values[i:j], unsafe.Offsetof(values[i].ptr))) 1132 1133 if d.table.Probe(probe, indexes[i:j:j]) > 0 { 1134 for k, v := range probe { 1135 if indexes[i+k] == int32(len(d.values)) { 1136 d.values = append(d.values, v) 1137 } 1138 } 1139 } 1140 } 1141 } 1142 1143 func (d *be128Dictionary) init(indexes []int32) { 1144 d.table = hashprobe.NewUint128Table(len(d.values), 0.75) 1145 1146 n := min(len(d.values), len(indexes)) 1147 1148 for i := 0; i < len(d.values); i += n { 1149 j := min(i+n, len(d.values)) 1150 d.table.Probe(d.values[i:j:j], indexes[:n:n]) 1151 } 1152 } 1153 1154 func (d *be128Dictionary) insert(indexes []int32, rows sparse.Array) { 1155 const chunkSize = insertsTargetCacheFootprint / 16 1156 1157 if d.table == nil { 1158 d.init(indexes) 1159 } 1160 1161 values := rows.Uint128Array() 1162 1163 for i := 0; i < values.Len(); i += chunkSize { 1164 j := min(i+chunkSize, values.Len()) 1165 1166 if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 { 1167 for k, index := range indexes[i:j] { 1168 if index == int32(len(d.values)) { 1169 d.values = append(d.values, values.Index(i+k)) 1170 } 1171 } 1172 } 1173 } 1174 } 1175 1176 func (d *be128Dictionary) Lookup(indexes []int32, values []Value) { 1177 model := d.makeValueString("") 1178 memsetValues(values, model) 1179 d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr))) 1180 } 1181 1182 func (d *be128Dictionary) Bounds(indexes []int32) (min, max Value) { 1183 if len(indexes) > 0 { 1184 minValue, maxValue := d.bounds(indexes) 1185 min = d.makeValue(minValue) 1186 max = d.makeValue(maxValue) 1187 } 1188 return min, max 1189 } 1190 1191 func (d *be128Dictionary) Reset() { 1192 d.values = d.values[:0] 1193 if d.table != nil { 1194 d.table.Reset() 1195 } 1196 } 1197 1198 func (d *be128Dictionary) Page() Page { 1199 return &d.be128Page 1200 } 1201 1202 // indexedType is a wrapper around a Type value which overrides object 1203 // constructors to use indexed versions referencing values in the dictionary 1204 // instead of storing plain values. 1205 type indexedType struct { 1206 Type 1207 dict Dictionary 1208 } 1209 1210 func newIndexedType(typ Type, dict Dictionary) *indexedType { 1211 return &indexedType{Type: typ, dict: dict} 1212 } 1213 1214 func (t *indexedType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer { 1215 return newIndexedColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues)) 1216 } 1217 1218 func (t *indexedType) NewPage(columnIndex, numValues int, data encoding.Values) Page { 1219 return newIndexedPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data) 1220 } 1221 1222 // indexedPage is an implementation of the Page interface which stores 1223 // indexes instead of plain value. The indexes reference the values in a 1224 // dictionary that the page was created for. 1225 type indexedPage struct { 1226 typ *indexedType 1227 values []int32 1228 columnIndex int16 1229 } 1230 1231 func newIndexedPage(typ *indexedType, columnIndex int16, numValues int32, data encoding.Values) *indexedPage { 1232 // RLE encoded values that contain dictionary indexes in data pages are 1233 // sometimes truncated when they contain only zeros. We account for this 1234 // special case here and extend the values buffer if it is shorter than 1235 // needed to hold `numValues`. 1236 size := int(numValues) 1237 values := data.Int32() 1238 1239 if len(values) < size { 1240 if cap(values) < size { 1241 tmp := make([]int32, size) 1242 copy(tmp, values) 1243 values = tmp 1244 } else { 1245 clear := values[len(values) : len(values)+size] 1246 for i := range clear { 1247 clear[i] = 0 1248 } 1249 } 1250 } 1251 1252 return &indexedPage{ 1253 typ: typ, 1254 values: values[:size], 1255 columnIndex: ^columnIndex, 1256 } 1257 } 1258 1259 func (page *indexedPage) Type() Type { return indexedPageType{page.typ} } 1260 1261 func (page *indexedPage) Column() int { return int(^page.columnIndex) } 1262 1263 func (page *indexedPage) Dictionary() Dictionary { return page.typ.dict } 1264 1265 func (page *indexedPage) NumRows() int64 { return int64(len(page.values)) } 1266 1267 func (page *indexedPage) NumValues() int64 { return int64(len(page.values)) } 1268 1269 func (page *indexedPage) NumNulls() int64 { return 0 } 1270 1271 func (page *indexedPage) Size() int64 { return 4 * int64(len(page.values)) } 1272 1273 func (page *indexedPage) RepetitionLevels() []byte { return nil } 1274 1275 func (page *indexedPage) DefinitionLevels() []byte { return nil } 1276 1277 func (page *indexedPage) Data() encoding.Values { return encoding.Int32Values(page.values) } 1278 1279 func (page *indexedPage) Values() ValueReader { return &indexedPageValues{page: page} } 1280 1281 func (page *indexedPage) Bounds() (min, max Value, ok bool) { 1282 if ok = len(page.values) > 0; ok { 1283 min, max = page.typ.dict.Bounds(page.values) 1284 min.columnIndex = page.columnIndex 1285 max.columnIndex = page.columnIndex 1286 } 1287 return min, max, ok 1288 } 1289 1290 func (page *indexedPage) Slice(i, j int64) Page { 1291 return &indexedPage{ 1292 typ: page.typ, 1293 values: page.values[i:j], 1294 columnIndex: page.columnIndex, 1295 } 1296 } 1297 1298 // indexedPageType is an adapter for the indexedType returned when accessing 1299 // the type of an indexedPage value. It overrides the Encode/Decode methods to 1300 // account for the fact that an indexed page is holding indexes of values into 1301 // its dictionary instead of plain values. 1302 type indexedPageType struct{ *indexedType } 1303 1304 func (t indexedPageType) NewValues(values []byte, _ []uint32) encoding.Values { 1305 return encoding.Int32ValuesFromBytes(values) 1306 } 1307 1308 func (t indexedPageType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) { 1309 return encoding.EncodeInt32(dst, src, enc) 1310 } 1311 1312 func (t indexedPageType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) { 1313 return encoding.DecodeInt32(dst, src, enc) 1314 } 1315 1316 func (t indexedPageType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int { 1317 return Int32Type.EstimateDecodeSize(numValues, src, enc) 1318 } 1319 1320 type indexedPageValues struct { 1321 page *indexedPage 1322 offset int 1323 } 1324 1325 func (r *indexedPageValues) ReadValues(values []Value) (n int, err error) { 1326 if n = len(r.page.values) - r.offset; n == 0 { 1327 return 0, io.EOF 1328 } 1329 if n > len(values) { 1330 n = len(values) 1331 } 1332 r.page.typ.dict.Lookup(r.page.values[r.offset:r.offset+n], values[:n]) 1333 r.offset += n 1334 if r.offset == len(r.page.values) { 1335 err = io.EOF 1336 } 1337 return n, err 1338 } 1339 1340 // indexedColumnBuffer is an implementation of the ColumnBuffer interface which 1341 // builds a page of indexes into a parent dictionary when values are written. 1342 type indexedColumnBuffer struct{ indexedPage } 1343 1344 func newIndexedColumnBuffer(typ *indexedType, columnIndex int16, numValues int32) *indexedColumnBuffer { 1345 return &indexedColumnBuffer{ 1346 indexedPage: indexedPage{ 1347 typ: typ, 1348 values: make([]int32, 0, numValues), 1349 columnIndex: ^columnIndex, 1350 }, 1351 } 1352 } 1353 1354 func (col *indexedColumnBuffer) Clone() ColumnBuffer { 1355 return &indexedColumnBuffer{ 1356 indexedPage: indexedPage{ 1357 typ: col.typ, 1358 values: append([]int32{}, col.values...), 1359 columnIndex: col.columnIndex, 1360 }, 1361 } 1362 } 1363 1364 func (col *indexedColumnBuffer) Type() Type { return col.typ.Type } 1365 1366 func (col *indexedColumnBuffer) ColumnIndex() ColumnIndex { return indexedColumnIndex{col} } 1367 1368 func (col *indexedColumnBuffer) OffsetIndex() OffsetIndex { return indexedOffsetIndex{col} } 1369 1370 func (col *indexedColumnBuffer) BloomFilter() BloomFilter { return nil } 1371 1372 func (col *indexedColumnBuffer) Dictionary() Dictionary { return col.typ.dict } 1373 1374 func (col *indexedColumnBuffer) Pages() Pages { return onePage(col.Page()) } 1375 1376 func (col *indexedColumnBuffer) Page() Page { return &col.indexedPage } 1377 1378 func (col *indexedColumnBuffer) Reset() { col.values = col.values[:0] } 1379 1380 func (col *indexedColumnBuffer) Cap() int { return cap(col.values) } 1381 1382 func (col *indexedColumnBuffer) Len() int { return len(col.values) } 1383 1384 func (col *indexedColumnBuffer) Less(i, j int) bool { 1385 u := col.typ.dict.Index(col.values[i]) 1386 v := col.typ.dict.Index(col.values[j]) 1387 return col.typ.Compare(u, v) < 0 1388 } 1389 1390 func (col *indexedColumnBuffer) Swap(i, j int) { 1391 col.values[i], col.values[j] = col.values[j], col.values[i] 1392 } 1393 1394 func (col *indexedColumnBuffer) WriteValues(values []Value) (int, error) { 1395 i := len(col.values) 1396 j := len(col.values) + len(values) 1397 1398 if j <= cap(col.values) { 1399 col.values = col.values[:j] 1400 } else { 1401 tmp := make([]int32, j, 2*j) 1402 copy(tmp, col.values) 1403 col.values = tmp 1404 } 1405 1406 col.typ.dict.Insert(col.values[i:], values) 1407 return len(values), nil 1408 } 1409 1410 func (col *indexedColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) { 1411 i := len(col.values) 1412 j := len(col.values) + rows.Len() 1413 1414 if j <= cap(col.values) { 1415 col.values = col.values[:j] 1416 } else { 1417 tmp := make([]int32, j, 2*j) 1418 copy(tmp, col.values) 1419 col.values = tmp 1420 } 1421 1422 col.typ.dict.insert(col.values[i:], rows) 1423 } 1424 1425 func (col *indexedColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) { 1426 i := int(offset) 1427 switch { 1428 case i < 0: 1429 return 0, errRowIndexOutOfBounds(offset, int64(len(col.values))) 1430 case i >= len(col.values): 1431 return 0, io.EOF 1432 default: 1433 for n < len(values) && i < len(col.values) { 1434 values[n] = col.typ.dict.Index(col.values[i]) 1435 values[n].columnIndex = col.columnIndex 1436 n++ 1437 i++ 1438 } 1439 if n < len(values) { 1440 err = io.EOF 1441 } 1442 return n, err 1443 } 1444 } 1445 1446 func (col *indexedColumnBuffer) ReadRowAt(row Row, index int64) (Row, error) { 1447 switch { 1448 case index < 0: 1449 return row, errRowIndexOutOfBounds(index, int64(len(col.values))) 1450 case index >= int64(len(col.values)): 1451 return row, io.EOF 1452 default: 1453 v := col.typ.dict.Index(col.values[index]) 1454 v.columnIndex = col.columnIndex 1455 return append(row, v), nil 1456 } 1457 } 1458 1459 type indexedColumnIndex struct{ col *indexedColumnBuffer } 1460 1461 func (index indexedColumnIndex) NumPages() int { return 1 } 1462 func (index indexedColumnIndex) NullCount(int) int64 { return 0 } 1463 func (index indexedColumnIndex) NullPage(int) bool { return false } 1464 func (index indexedColumnIndex) MinValue(int) Value { 1465 min, _, _ := index.col.Bounds() 1466 return min 1467 } 1468 func (index indexedColumnIndex) MaxValue(int) Value { 1469 _, max, _ := index.col.Bounds() 1470 return max 1471 } 1472 func (index indexedColumnIndex) IsAscending() bool { 1473 min, max, _ := index.col.Bounds() 1474 return index.col.typ.Compare(min, max) <= 0 1475 } 1476 func (index indexedColumnIndex) IsDescending() bool { 1477 min, max, _ := index.col.Bounds() 1478 return index.col.typ.Compare(min, max) > 0 1479 } 1480 1481 type indexedOffsetIndex struct{ col *indexedColumnBuffer } 1482 1483 func (index indexedOffsetIndex) NumPages() int { return 1 } 1484 func (index indexedOffsetIndex) Offset(int) int64 { return 0 } 1485 func (index indexedOffsetIndex) CompressedPageSize(int) int64 { return index.col.Size() } 1486 func (index indexedOffsetIndex) FirstRowIndex(int) int64 { return 0 }