github.com/apache/arrow/go/v10@v10.0.1/internal/hashing/xxh3_memo_table.gen.go (about) 1 // Code generated by xxh3_memo_table.gen.go.tmpl. DO NOT EDIT. 2 3 // Licensed to the Apache Software Foundation (ASF) under one 4 // or more contributor license agreements. See the NOTICE file 5 // distributed with this work for additional information 6 // regarding copyright ownership. The ASF licenses this file 7 // to you under the Apache License, Version 2.0 (the 8 // "License"); you may not use this file except in compliance 9 // with the License. You may obtain a copy of the License at 10 // 11 // http://www.apache.org/licenses/LICENSE-2.0 12 // 13 // Unless required by applicable law or agreed to in writing, software 14 // distributed under the License is distributed on an "AS IS" BASIS, 15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 // See the License for the specific language governing permissions and 17 // limitations under the License. 18 19 package hashing 20 21 import ( 22 "math" 23 24 "github.com/apache/arrow/go/v10/arrow" 25 "github.com/apache/arrow/go/v10/arrow/bitutil" 26 "github.com/apache/arrow/go/v10/internal/utils" 27 ) 28 29 type payloadInt8 struct { 30 val int8 31 memoIdx int32 32 } 33 34 type entryInt8 struct { 35 h uint64 36 payload payloadInt8 37 } 38 39 func (e entryInt8) Valid() bool { return e.h != sentinel } 40 41 // Int8HashTable is a hashtable specifically for int8 that 42 // is utilized with the MemoTable to generalize interactions for easier 43 // implementation of dictionaries without losing performance. 44 type Int8HashTable struct { 45 cap uint64 46 capMask uint64 47 size uint64 48 49 entries []entryInt8 50 } 51 52 // NewInt8HashTable returns a new hash table for int8 values 53 // initialized with the passed in capacity or 32 whichever is larger. 54 func NewInt8HashTable(cap uint64) *Int8HashTable { 55 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 56 ret := &Int8HashTable{cap: initCap, capMask: initCap - 1, size: 0} 57 ret.entries = make([]entryInt8, initCap) 58 return ret 59 } 60 61 // Reset drops all of the values in this hash table and re-initializes it 62 // with the specified initial capacity as if by calling New, but without having 63 // to reallocate the object. 64 func (h *Int8HashTable) Reset(cap uint64) { 65 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 66 h.capMask = h.cap - 1 67 h.size = 0 68 h.entries = make([]entryInt8, h.cap) 69 } 70 71 // CopyValues is used for copying the values out of the hash table into the 72 // passed in slice, in the order that they were first inserted 73 func (h *Int8HashTable) CopyValues(out []int8) { 74 h.CopyValuesSubset(0, out) 75 } 76 77 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 78 // with the value at start, in the order that they were inserted. 79 func (h *Int8HashTable) CopyValuesSubset(start int, out []int8) { 80 h.VisitEntries(func(e *entryInt8) { 81 idx := e.payload.memoIdx - int32(start) 82 if idx >= 0 { 83 out[idx] = e.payload.val 84 } 85 }) 86 } 87 88 func (h *Int8HashTable) WriteOut(out []byte) { 89 h.WriteOutSubset(0, out) 90 } 91 92 func (h *Int8HashTable) WriteOutSubset(start int, out []byte) { 93 data := arrow.Int8Traits.CastFromBytes(out) 94 h.VisitEntries(func(e *entryInt8) { 95 idx := e.payload.memoIdx - int32(start) 96 if idx >= 0 { 97 data[idx] = e.payload.val 98 } 99 }) 100 } 101 102 func (h *Int8HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 103 104 func (Int8HashTable) fixHash(v uint64) uint64 { 105 if v == sentinel { 106 return 42 107 } 108 return v 109 } 110 111 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 112 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 113 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 114 func (h *Int8HashTable) Lookup(v uint64, cmp func(int8) bool) (*entryInt8, bool) { 115 idx, ok := h.lookup(v, h.capMask, cmp) 116 return &h.entries[idx], ok 117 } 118 119 func (h *Int8HashTable) lookup(v uint64, szMask uint64, cmp func(int8) bool) (uint64, bool) { 120 const perturbShift uint8 = 5 121 122 var ( 123 idx uint64 124 perturb uint64 125 e *entryInt8 126 ) 127 128 v = h.fixHash(v) 129 idx = v & szMask 130 perturb = (v >> uint64(perturbShift)) + 1 131 132 for { 133 e = &h.entries[idx] 134 if e.h == v && cmp(e.payload.val) { 135 return idx, true 136 } 137 138 if e.h == sentinel { 139 return idx, false 140 } 141 142 // perturbation logic inspired from CPython's set/dict object 143 // the goal is that all 64 bits of unmasked hash value eventually 144 // participate int he probing sequence, to minimize clustering 145 idx = (idx + perturb) & szMask 146 perturb = (perturb >> uint64(perturbShift)) + 1 147 } 148 } 149 150 func (h *Int8HashTable) upsize(newcap uint64) error { 151 newMask := newcap - 1 152 153 oldEntries := h.entries 154 h.entries = make([]entryInt8, newcap) 155 for _, e := range oldEntries { 156 if e.Valid() { 157 idx, _ := h.lookup(e.h, newMask, func(int8) bool { return false }) 158 h.entries[idx] = e 159 } 160 } 161 h.cap = newcap 162 h.capMask = newMask 163 return nil 164 } 165 166 // Insert updates the given entry with the provided hash value, payload value and memo index. 167 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 168 func (h *Int8HashTable) Insert(e *entryInt8, v uint64, val int8, memoIdx int32) error { 169 e.h = h.fixHash(v) 170 e.payload.val = val 171 e.payload.memoIdx = memoIdx 172 h.size++ 173 174 if h.needUpsize() { 175 h.upsize(h.cap * uint64(loadFactor) * 2) 176 } 177 return nil 178 } 179 180 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 181 // a valid entry being one which has had a value inserted into it. 182 func (h *Int8HashTable) VisitEntries(visit func(*entryInt8)) { 183 for _, e := range h.entries { 184 if e.Valid() { 185 visit(&e) 186 } 187 } 188 } 189 190 // Int8MemoTable is a wrapper over the appropriate hashtable to provide an interface 191 // conforming to the MemoTable interface defined in the encoding package for general interactions 192 // regarding dictionaries. 193 type Int8MemoTable struct { 194 tbl *Int8HashTable 195 nullIdx int32 196 } 197 198 // NewInt8MemoTable returns a new memotable with num entries pre-allocated to reduce further 199 // allocations when inserting. 200 func NewInt8MemoTable(num int64) *Int8MemoTable { 201 return &Int8MemoTable{tbl: NewInt8HashTable(uint64(num)), nullIdx: KeyNotFound} 202 } 203 204 func (Int8MemoTable) TypeTraits() TypeTraits { 205 return arrow.Int8Traits 206 } 207 208 // Reset allows this table to be re-used by dumping all the data currently in the table. 209 func (s *Int8MemoTable) Reset() { 210 s.tbl.Reset(32) 211 s.nullIdx = KeyNotFound 212 } 213 214 // Size returns the current number of inserted elements into the table including if a null 215 // has been inserted. 216 func (s *Int8MemoTable) Size() int { 217 sz := int(s.tbl.size) 218 if _, ok := s.GetNull(); ok { 219 sz++ 220 } 221 return sz 222 } 223 224 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 225 // that will be true if found and false if not. 226 func (s *Int8MemoTable) GetNull() (int, bool) { 227 return int(s.nullIdx), s.nullIdx != KeyNotFound 228 } 229 230 // GetOrInsertNull will return the index of the null entry or insert a null entry 231 // if one currently doesn't exist. The found value will be true if there was already 232 // a null in the table, and false if it inserted one. 233 func (s *Int8MemoTable) GetOrInsertNull() (idx int, found bool) { 234 idx, found = s.GetNull() 235 if !found { 236 idx = s.Size() 237 s.nullIdx = int32(idx) 238 } 239 return 240 } 241 242 // CopyValues will copy the values from the memo table out into the passed in slice 243 // which must be of the appropriate type. 244 func (s *Int8MemoTable) CopyValues(out interface{}) { 245 s.CopyValuesSubset(0, out) 246 } 247 248 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 249 // at the provided start index 250 func (s *Int8MemoTable) CopyValuesSubset(start int, out interface{}) { 251 s.tbl.CopyValuesSubset(start, out.([]int8)) 252 } 253 254 func (s *Int8MemoTable) WriteOut(out []byte) { 255 s.tbl.CopyValues(arrow.Int8Traits.CastFromBytes(out)) 256 } 257 258 func (s *Int8MemoTable) WriteOutSubset(start int, out []byte) { 259 s.tbl.CopyValuesSubset(start, arrow.Int8Traits.CastFromBytes(out)) 260 } 261 262 func (s *Int8MemoTable) WriteOutLE(out []byte) { 263 s.tbl.WriteOut(out) 264 } 265 266 func (s *Int8MemoTable) WriteOutSubsetLE(start int, out []byte) { 267 s.tbl.WriteOutSubset(start, out) 268 } 269 270 // Get returns the index of the requested value in the hash table or KeyNotFound 271 // along with a boolean indicating if it was found or not. 272 func (s *Int8MemoTable) Get(val interface{}) (int, bool) { 273 274 h := hashInt(uint64(val.(int8)), 0) 275 if e, ok := s.tbl.Lookup(h, func(v int8) bool { return val.(int8) == v }); ok { 276 return int(e.payload.memoIdx), ok 277 } 278 return KeyNotFound, false 279 } 280 281 // GetOrInsert will return the index of the specified value in the table, or insert the 282 // value into the table and return the new index. found indicates whether or not it already 283 // existed in the table (true) or was inserted by this call (false). 284 func (s *Int8MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 285 286 h := hashInt(uint64(val.(int8)), 0) 287 e, ok := s.tbl.Lookup(h, func(v int8) bool { 288 return val.(int8) == v 289 }) 290 291 if ok { 292 idx = int(e.payload.memoIdx) 293 found = true 294 } else { 295 idx = s.Size() 296 s.tbl.Insert(e, h, val.(int8), int32(idx)) 297 } 298 return 299 } 300 301 type payloadUint8 struct { 302 val uint8 303 memoIdx int32 304 } 305 306 type entryUint8 struct { 307 h uint64 308 payload payloadUint8 309 } 310 311 func (e entryUint8) Valid() bool { return e.h != sentinel } 312 313 // Uint8HashTable is a hashtable specifically for uint8 that 314 // is utilized with the MemoTable to generalize interactions for easier 315 // implementation of dictionaries without losing performance. 316 type Uint8HashTable struct { 317 cap uint64 318 capMask uint64 319 size uint64 320 321 entries []entryUint8 322 } 323 324 // NewUint8HashTable returns a new hash table for uint8 values 325 // initialized with the passed in capacity or 32 whichever is larger. 326 func NewUint8HashTable(cap uint64) *Uint8HashTable { 327 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 328 ret := &Uint8HashTable{cap: initCap, capMask: initCap - 1, size: 0} 329 ret.entries = make([]entryUint8, initCap) 330 return ret 331 } 332 333 // Reset drops all of the values in this hash table and re-initializes it 334 // with the specified initial capacity as if by calling New, but without having 335 // to reallocate the object. 336 func (h *Uint8HashTable) Reset(cap uint64) { 337 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 338 h.capMask = h.cap - 1 339 h.size = 0 340 h.entries = make([]entryUint8, h.cap) 341 } 342 343 // CopyValues is used for copying the values out of the hash table into the 344 // passed in slice, in the order that they were first inserted 345 func (h *Uint8HashTable) CopyValues(out []uint8) { 346 h.CopyValuesSubset(0, out) 347 } 348 349 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 350 // with the value at start, in the order that they were inserted. 351 func (h *Uint8HashTable) CopyValuesSubset(start int, out []uint8) { 352 h.VisitEntries(func(e *entryUint8) { 353 idx := e.payload.memoIdx - int32(start) 354 if idx >= 0 { 355 out[idx] = e.payload.val 356 } 357 }) 358 } 359 360 func (h *Uint8HashTable) WriteOut(out []byte) { 361 h.WriteOutSubset(0, out) 362 } 363 364 func (h *Uint8HashTable) WriteOutSubset(start int, out []byte) { 365 data := arrow.Uint8Traits.CastFromBytes(out) 366 h.VisitEntries(func(e *entryUint8) { 367 idx := e.payload.memoIdx - int32(start) 368 if idx >= 0 { 369 data[idx] = e.payload.val 370 } 371 }) 372 } 373 374 func (h *Uint8HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 375 376 func (Uint8HashTable) fixHash(v uint64) uint64 { 377 if v == sentinel { 378 return 42 379 } 380 return v 381 } 382 383 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 384 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 385 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 386 func (h *Uint8HashTable) Lookup(v uint64, cmp func(uint8) bool) (*entryUint8, bool) { 387 idx, ok := h.lookup(v, h.capMask, cmp) 388 return &h.entries[idx], ok 389 } 390 391 func (h *Uint8HashTable) lookup(v uint64, szMask uint64, cmp func(uint8) bool) (uint64, bool) { 392 const perturbShift uint8 = 5 393 394 var ( 395 idx uint64 396 perturb uint64 397 e *entryUint8 398 ) 399 400 v = h.fixHash(v) 401 idx = v & szMask 402 perturb = (v >> uint64(perturbShift)) + 1 403 404 for { 405 e = &h.entries[idx] 406 if e.h == v && cmp(e.payload.val) { 407 return idx, true 408 } 409 410 if e.h == sentinel { 411 return idx, false 412 } 413 414 // perturbation logic inspired from CPython's set/dict object 415 // the goal is that all 64 bits of unmasked hash value eventually 416 // participate int he probing sequence, to minimize clustering 417 idx = (idx + perturb) & szMask 418 perturb = (perturb >> uint64(perturbShift)) + 1 419 } 420 } 421 422 func (h *Uint8HashTable) upsize(newcap uint64) error { 423 newMask := newcap - 1 424 425 oldEntries := h.entries 426 h.entries = make([]entryUint8, newcap) 427 for _, e := range oldEntries { 428 if e.Valid() { 429 idx, _ := h.lookup(e.h, newMask, func(uint8) bool { return false }) 430 h.entries[idx] = e 431 } 432 } 433 h.cap = newcap 434 h.capMask = newMask 435 return nil 436 } 437 438 // Insert updates the given entry with the provided hash value, payload value and memo index. 439 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 440 func (h *Uint8HashTable) Insert(e *entryUint8, v uint64, val uint8, memoIdx int32) error { 441 e.h = h.fixHash(v) 442 e.payload.val = val 443 e.payload.memoIdx = memoIdx 444 h.size++ 445 446 if h.needUpsize() { 447 h.upsize(h.cap * uint64(loadFactor) * 2) 448 } 449 return nil 450 } 451 452 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 453 // a valid entry being one which has had a value inserted into it. 454 func (h *Uint8HashTable) VisitEntries(visit func(*entryUint8)) { 455 for _, e := range h.entries { 456 if e.Valid() { 457 visit(&e) 458 } 459 } 460 } 461 462 // Uint8MemoTable is a wrapper over the appropriate hashtable to provide an interface 463 // conforming to the MemoTable interface defined in the encoding package for general interactions 464 // regarding dictionaries. 465 type Uint8MemoTable struct { 466 tbl *Uint8HashTable 467 nullIdx int32 468 } 469 470 // NewUint8MemoTable returns a new memotable with num entries pre-allocated to reduce further 471 // allocations when inserting. 472 func NewUint8MemoTable(num int64) *Uint8MemoTable { 473 return &Uint8MemoTable{tbl: NewUint8HashTable(uint64(num)), nullIdx: KeyNotFound} 474 } 475 476 func (Uint8MemoTable) TypeTraits() TypeTraits { 477 return arrow.Uint8Traits 478 } 479 480 // Reset allows this table to be re-used by dumping all the data currently in the table. 481 func (s *Uint8MemoTable) Reset() { 482 s.tbl.Reset(32) 483 s.nullIdx = KeyNotFound 484 } 485 486 // Size returns the current number of inserted elements into the table including if a null 487 // has been inserted. 488 func (s *Uint8MemoTable) Size() int { 489 sz := int(s.tbl.size) 490 if _, ok := s.GetNull(); ok { 491 sz++ 492 } 493 return sz 494 } 495 496 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 497 // that will be true if found and false if not. 498 func (s *Uint8MemoTable) GetNull() (int, bool) { 499 return int(s.nullIdx), s.nullIdx != KeyNotFound 500 } 501 502 // GetOrInsertNull will return the index of the null entry or insert a null entry 503 // if one currently doesn't exist. The found value will be true if there was already 504 // a null in the table, and false if it inserted one. 505 func (s *Uint8MemoTable) GetOrInsertNull() (idx int, found bool) { 506 idx, found = s.GetNull() 507 if !found { 508 idx = s.Size() 509 s.nullIdx = int32(idx) 510 } 511 return 512 } 513 514 // CopyValues will copy the values from the memo table out into the passed in slice 515 // which must be of the appropriate type. 516 func (s *Uint8MemoTable) CopyValues(out interface{}) { 517 s.CopyValuesSubset(0, out) 518 } 519 520 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 521 // at the provided start index 522 func (s *Uint8MemoTable) CopyValuesSubset(start int, out interface{}) { 523 s.tbl.CopyValuesSubset(start, out.([]uint8)) 524 } 525 526 func (s *Uint8MemoTable) WriteOut(out []byte) { 527 s.tbl.CopyValues(arrow.Uint8Traits.CastFromBytes(out)) 528 } 529 530 func (s *Uint8MemoTable) WriteOutSubset(start int, out []byte) { 531 s.tbl.CopyValuesSubset(start, arrow.Uint8Traits.CastFromBytes(out)) 532 } 533 534 func (s *Uint8MemoTable) WriteOutLE(out []byte) { 535 s.tbl.WriteOut(out) 536 } 537 538 func (s *Uint8MemoTable) WriteOutSubsetLE(start int, out []byte) { 539 s.tbl.WriteOutSubset(start, out) 540 } 541 542 // Get returns the index of the requested value in the hash table or KeyNotFound 543 // along with a boolean indicating if it was found or not. 544 func (s *Uint8MemoTable) Get(val interface{}) (int, bool) { 545 546 h := hashInt(uint64(val.(uint8)), 0) 547 if e, ok := s.tbl.Lookup(h, func(v uint8) bool { return val.(uint8) == v }); ok { 548 return int(e.payload.memoIdx), ok 549 } 550 return KeyNotFound, false 551 } 552 553 // GetOrInsert will return the index of the specified value in the table, or insert the 554 // value into the table and return the new index. found indicates whether or not it already 555 // existed in the table (true) or was inserted by this call (false). 556 func (s *Uint8MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 557 558 h := hashInt(uint64(val.(uint8)), 0) 559 e, ok := s.tbl.Lookup(h, func(v uint8) bool { 560 return val.(uint8) == v 561 }) 562 563 if ok { 564 idx = int(e.payload.memoIdx) 565 found = true 566 } else { 567 idx = s.Size() 568 s.tbl.Insert(e, h, val.(uint8), int32(idx)) 569 } 570 return 571 } 572 573 type payloadInt16 struct { 574 val int16 575 memoIdx int32 576 } 577 578 type entryInt16 struct { 579 h uint64 580 payload payloadInt16 581 } 582 583 func (e entryInt16) Valid() bool { return e.h != sentinel } 584 585 // Int16HashTable is a hashtable specifically for int16 that 586 // is utilized with the MemoTable to generalize interactions for easier 587 // implementation of dictionaries without losing performance. 588 type Int16HashTable struct { 589 cap uint64 590 capMask uint64 591 size uint64 592 593 entries []entryInt16 594 } 595 596 // NewInt16HashTable returns a new hash table for int16 values 597 // initialized with the passed in capacity or 32 whichever is larger. 598 func NewInt16HashTable(cap uint64) *Int16HashTable { 599 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 600 ret := &Int16HashTable{cap: initCap, capMask: initCap - 1, size: 0} 601 ret.entries = make([]entryInt16, initCap) 602 return ret 603 } 604 605 // Reset drops all of the values in this hash table and re-initializes it 606 // with the specified initial capacity as if by calling New, but without having 607 // to reallocate the object. 608 func (h *Int16HashTable) Reset(cap uint64) { 609 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 610 h.capMask = h.cap - 1 611 h.size = 0 612 h.entries = make([]entryInt16, h.cap) 613 } 614 615 // CopyValues is used for copying the values out of the hash table into the 616 // passed in slice, in the order that they were first inserted 617 func (h *Int16HashTable) CopyValues(out []int16) { 618 h.CopyValuesSubset(0, out) 619 } 620 621 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 622 // with the value at start, in the order that they were inserted. 623 func (h *Int16HashTable) CopyValuesSubset(start int, out []int16) { 624 h.VisitEntries(func(e *entryInt16) { 625 idx := e.payload.memoIdx - int32(start) 626 if idx >= 0 { 627 out[idx] = e.payload.val 628 } 629 }) 630 } 631 632 func (h *Int16HashTable) WriteOut(out []byte) { 633 h.WriteOutSubset(0, out) 634 } 635 636 func (h *Int16HashTable) WriteOutSubset(start int, out []byte) { 637 data := arrow.Int16Traits.CastFromBytes(out) 638 h.VisitEntries(func(e *entryInt16) { 639 idx := e.payload.memoIdx - int32(start) 640 if idx >= 0 { 641 data[idx] = utils.ToLEInt16(e.payload.val) 642 } 643 }) 644 } 645 646 func (h *Int16HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 647 648 func (Int16HashTable) fixHash(v uint64) uint64 { 649 if v == sentinel { 650 return 42 651 } 652 return v 653 } 654 655 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 656 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 657 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 658 func (h *Int16HashTable) Lookup(v uint64, cmp func(int16) bool) (*entryInt16, bool) { 659 idx, ok := h.lookup(v, h.capMask, cmp) 660 return &h.entries[idx], ok 661 } 662 663 func (h *Int16HashTable) lookup(v uint64, szMask uint64, cmp func(int16) bool) (uint64, bool) { 664 const perturbShift uint8 = 5 665 666 var ( 667 idx uint64 668 perturb uint64 669 e *entryInt16 670 ) 671 672 v = h.fixHash(v) 673 idx = v & szMask 674 perturb = (v >> uint64(perturbShift)) + 1 675 676 for { 677 e = &h.entries[idx] 678 if e.h == v && cmp(e.payload.val) { 679 return idx, true 680 } 681 682 if e.h == sentinel { 683 return idx, false 684 } 685 686 // perturbation logic inspired from CPython's set/dict object 687 // the goal is that all 64 bits of unmasked hash value eventually 688 // participate int he probing sequence, to minimize clustering 689 idx = (idx + perturb) & szMask 690 perturb = (perturb >> uint64(perturbShift)) + 1 691 } 692 } 693 694 func (h *Int16HashTable) upsize(newcap uint64) error { 695 newMask := newcap - 1 696 697 oldEntries := h.entries 698 h.entries = make([]entryInt16, newcap) 699 for _, e := range oldEntries { 700 if e.Valid() { 701 idx, _ := h.lookup(e.h, newMask, func(int16) bool { return false }) 702 h.entries[idx] = e 703 } 704 } 705 h.cap = newcap 706 h.capMask = newMask 707 return nil 708 } 709 710 // Insert updates the given entry with the provided hash value, payload value and memo index. 711 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 712 func (h *Int16HashTable) Insert(e *entryInt16, v uint64, val int16, memoIdx int32) error { 713 e.h = h.fixHash(v) 714 e.payload.val = val 715 e.payload.memoIdx = memoIdx 716 h.size++ 717 718 if h.needUpsize() { 719 h.upsize(h.cap * uint64(loadFactor) * 2) 720 } 721 return nil 722 } 723 724 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 725 // a valid entry being one which has had a value inserted into it. 726 func (h *Int16HashTable) VisitEntries(visit func(*entryInt16)) { 727 for _, e := range h.entries { 728 if e.Valid() { 729 visit(&e) 730 } 731 } 732 } 733 734 // Int16MemoTable is a wrapper over the appropriate hashtable to provide an interface 735 // conforming to the MemoTable interface defined in the encoding package for general interactions 736 // regarding dictionaries. 737 type Int16MemoTable struct { 738 tbl *Int16HashTable 739 nullIdx int32 740 } 741 742 // NewInt16MemoTable returns a new memotable with num entries pre-allocated to reduce further 743 // allocations when inserting. 744 func NewInt16MemoTable(num int64) *Int16MemoTable { 745 return &Int16MemoTable{tbl: NewInt16HashTable(uint64(num)), nullIdx: KeyNotFound} 746 } 747 748 func (Int16MemoTable) TypeTraits() TypeTraits { 749 return arrow.Int16Traits 750 } 751 752 // Reset allows this table to be re-used by dumping all the data currently in the table. 753 func (s *Int16MemoTable) Reset() { 754 s.tbl.Reset(32) 755 s.nullIdx = KeyNotFound 756 } 757 758 // Size returns the current number of inserted elements into the table including if a null 759 // has been inserted. 760 func (s *Int16MemoTable) Size() int { 761 sz := int(s.tbl.size) 762 if _, ok := s.GetNull(); ok { 763 sz++ 764 } 765 return sz 766 } 767 768 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 769 // that will be true if found and false if not. 770 func (s *Int16MemoTable) GetNull() (int, bool) { 771 return int(s.nullIdx), s.nullIdx != KeyNotFound 772 } 773 774 // GetOrInsertNull will return the index of the null entry or insert a null entry 775 // if one currently doesn't exist. The found value will be true if there was already 776 // a null in the table, and false if it inserted one. 777 func (s *Int16MemoTable) GetOrInsertNull() (idx int, found bool) { 778 idx, found = s.GetNull() 779 if !found { 780 idx = s.Size() 781 s.nullIdx = int32(idx) 782 } 783 return 784 } 785 786 // CopyValues will copy the values from the memo table out into the passed in slice 787 // which must be of the appropriate type. 788 func (s *Int16MemoTable) CopyValues(out interface{}) { 789 s.CopyValuesSubset(0, out) 790 } 791 792 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 793 // at the provided start index 794 func (s *Int16MemoTable) CopyValuesSubset(start int, out interface{}) { 795 s.tbl.CopyValuesSubset(start, out.([]int16)) 796 } 797 798 func (s *Int16MemoTable) WriteOut(out []byte) { 799 s.tbl.CopyValues(arrow.Int16Traits.CastFromBytes(out)) 800 } 801 802 func (s *Int16MemoTable) WriteOutSubset(start int, out []byte) { 803 s.tbl.CopyValuesSubset(start, arrow.Int16Traits.CastFromBytes(out)) 804 } 805 806 func (s *Int16MemoTable) WriteOutLE(out []byte) { 807 s.tbl.WriteOut(out) 808 } 809 810 func (s *Int16MemoTable) WriteOutSubsetLE(start int, out []byte) { 811 s.tbl.WriteOutSubset(start, out) 812 } 813 814 // Get returns the index of the requested value in the hash table or KeyNotFound 815 // along with a boolean indicating if it was found or not. 816 func (s *Int16MemoTable) Get(val interface{}) (int, bool) { 817 818 h := hashInt(uint64(val.(int16)), 0) 819 if e, ok := s.tbl.Lookup(h, func(v int16) bool { return val.(int16) == v }); ok { 820 return int(e.payload.memoIdx), ok 821 } 822 return KeyNotFound, false 823 } 824 825 // GetOrInsert will return the index of the specified value in the table, or insert the 826 // value into the table and return the new index. found indicates whether or not it already 827 // existed in the table (true) or was inserted by this call (false). 828 func (s *Int16MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 829 830 h := hashInt(uint64(val.(int16)), 0) 831 e, ok := s.tbl.Lookup(h, func(v int16) bool { 832 return val.(int16) == v 833 }) 834 835 if ok { 836 idx = int(e.payload.memoIdx) 837 found = true 838 } else { 839 idx = s.Size() 840 s.tbl.Insert(e, h, val.(int16), int32(idx)) 841 } 842 return 843 } 844 845 type payloadUint16 struct { 846 val uint16 847 memoIdx int32 848 } 849 850 type entryUint16 struct { 851 h uint64 852 payload payloadUint16 853 } 854 855 func (e entryUint16) Valid() bool { return e.h != sentinel } 856 857 // Uint16HashTable is a hashtable specifically for uint16 that 858 // is utilized with the MemoTable to generalize interactions for easier 859 // implementation of dictionaries without losing performance. 860 type Uint16HashTable struct { 861 cap uint64 862 capMask uint64 863 size uint64 864 865 entries []entryUint16 866 } 867 868 // NewUint16HashTable returns a new hash table for uint16 values 869 // initialized with the passed in capacity or 32 whichever is larger. 870 func NewUint16HashTable(cap uint64) *Uint16HashTable { 871 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 872 ret := &Uint16HashTable{cap: initCap, capMask: initCap - 1, size: 0} 873 ret.entries = make([]entryUint16, initCap) 874 return ret 875 } 876 877 // Reset drops all of the values in this hash table and re-initializes it 878 // with the specified initial capacity as if by calling New, but without having 879 // to reallocate the object. 880 func (h *Uint16HashTable) Reset(cap uint64) { 881 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 882 h.capMask = h.cap - 1 883 h.size = 0 884 h.entries = make([]entryUint16, h.cap) 885 } 886 887 // CopyValues is used for copying the values out of the hash table into the 888 // passed in slice, in the order that they were first inserted 889 func (h *Uint16HashTable) CopyValues(out []uint16) { 890 h.CopyValuesSubset(0, out) 891 } 892 893 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 894 // with the value at start, in the order that they were inserted. 895 func (h *Uint16HashTable) CopyValuesSubset(start int, out []uint16) { 896 h.VisitEntries(func(e *entryUint16) { 897 idx := e.payload.memoIdx - int32(start) 898 if idx >= 0 { 899 out[idx] = e.payload.val 900 } 901 }) 902 } 903 904 func (h *Uint16HashTable) WriteOut(out []byte) { 905 h.WriteOutSubset(0, out) 906 } 907 908 func (h *Uint16HashTable) WriteOutSubset(start int, out []byte) { 909 data := arrow.Uint16Traits.CastFromBytes(out) 910 h.VisitEntries(func(e *entryUint16) { 911 idx := e.payload.memoIdx - int32(start) 912 if idx >= 0 { 913 data[idx] = utils.ToLEUint16(e.payload.val) 914 } 915 }) 916 } 917 918 func (h *Uint16HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 919 920 func (Uint16HashTable) fixHash(v uint64) uint64 { 921 if v == sentinel { 922 return 42 923 } 924 return v 925 } 926 927 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 928 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 929 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 930 func (h *Uint16HashTable) Lookup(v uint64, cmp func(uint16) bool) (*entryUint16, bool) { 931 idx, ok := h.lookup(v, h.capMask, cmp) 932 return &h.entries[idx], ok 933 } 934 935 func (h *Uint16HashTable) lookup(v uint64, szMask uint64, cmp func(uint16) bool) (uint64, bool) { 936 const perturbShift uint8 = 5 937 938 var ( 939 idx uint64 940 perturb uint64 941 e *entryUint16 942 ) 943 944 v = h.fixHash(v) 945 idx = v & szMask 946 perturb = (v >> uint64(perturbShift)) + 1 947 948 for { 949 e = &h.entries[idx] 950 if e.h == v && cmp(e.payload.val) { 951 return idx, true 952 } 953 954 if e.h == sentinel { 955 return idx, false 956 } 957 958 // perturbation logic inspired from CPython's set/dict object 959 // the goal is that all 64 bits of unmasked hash value eventually 960 // participate int he probing sequence, to minimize clustering 961 idx = (idx + perturb) & szMask 962 perturb = (perturb >> uint64(perturbShift)) + 1 963 } 964 } 965 966 func (h *Uint16HashTable) upsize(newcap uint64) error { 967 newMask := newcap - 1 968 969 oldEntries := h.entries 970 h.entries = make([]entryUint16, newcap) 971 for _, e := range oldEntries { 972 if e.Valid() { 973 idx, _ := h.lookup(e.h, newMask, func(uint16) bool { return false }) 974 h.entries[idx] = e 975 } 976 } 977 h.cap = newcap 978 h.capMask = newMask 979 return nil 980 } 981 982 // Insert updates the given entry with the provided hash value, payload value and memo index. 983 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 984 func (h *Uint16HashTable) Insert(e *entryUint16, v uint64, val uint16, memoIdx int32) error { 985 e.h = h.fixHash(v) 986 e.payload.val = val 987 e.payload.memoIdx = memoIdx 988 h.size++ 989 990 if h.needUpsize() { 991 h.upsize(h.cap * uint64(loadFactor) * 2) 992 } 993 return nil 994 } 995 996 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 997 // a valid entry being one which has had a value inserted into it. 998 func (h *Uint16HashTable) VisitEntries(visit func(*entryUint16)) { 999 for _, e := range h.entries { 1000 if e.Valid() { 1001 visit(&e) 1002 } 1003 } 1004 } 1005 1006 // Uint16MemoTable is a wrapper over the appropriate hashtable to provide an interface 1007 // conforming to the MemoTable interface defined in the encoding package for general interactions 1008 // regarding dictionaries. 1009 type Uint16MemoTable struct { 1010 tbl *Uint16HashTable 1011 nullIdx int32 1012 } 1013 1014 // NewUint16MemoTable returns a new memotable with num entries pre-allocated to reduce further 1015 // allocations when inserting. 1016 func NewUint16MemoTable(num int64) *Uint16MemoTable { 1017 return &Uint16MemoTable{tbl: NewUint16HashTable(uint64(num)), nullIdx: KeyNotFound} 1018 } 1019 1020 func (Uint16MemoTable) TypeTraits() TypeTraits { 1021 return arrow.Uint16Traits 1022 } 1023 1024 // Reset allows this table to be re-used by dumping all the data currently in the table. 1025 func (s *Uint16MemoTable) Reset() { 1026 s.tbl.Reset(32) 1027 s.nullIdx = KeyNotFound 1028 } 1029 1030 // Size returns the current number of inserted elements into the table including if a null 1031 // has been inserted. 1032 func (s *Uint16MemoTable) Size() int { 1033 sz := int(s.tbl.size) 1034 if _, ok := s.GetNull(); ok { 1035 sz++ 1036 } 1037 return sz 1038 } 1039 1040 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 1041 // that will be true if found and false if not. 1042 func (s *Uint16MemoTable) GetNull() (int, bool) { 1043 return int(s.nullIdx), s.nullIdx != KeyNotFound 1044 } 1045 1046 // GetOrInsertNull will return the index of the null entry or insert a null entry 1047 // if one currently doesn't exist. The found value will be true if there was already 1048 // a null in the table, and false if it inserted one. 1049 func (s *Uint16MemoTable) GetOrInsertNull() (idx int, found bool) { 1050 idx, found = s.GetNull() 1051 if !found { 1052 idx = s.Size() 1053 s.nullIdx = int32(idx) 1054 } 1055 return 1056 } 1057 1058 // CopyValues will copy the values from the memo table out into the passed in slice 1059 // which must be of the appropriate type. 1060 func (s *Uint16MemoTable) CopyValues(out interface{}) { 1061 s.CopyValuesSubset(0, out) 1062 } 1063 1064 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 1065 // at the provided start index 1066 func (s *Uint16MemoTable) CopyValuesSubset(start int, out interface{}) { 1067 s.tbl.CopyValuesSubset(start, out.([]uint16)) 1068 } 1069 1070 func (s *Uint16MemoTable) WriteOut(out []byte) { 1071 s.tbl.CopyValues(arrow.Uint16Traits.CastFromBytes(out)) 1072 } 1073 1074 func (s *Uint16MemoTable) WriteOutSubset(start int, out []byte) { 1075 s.tbl.CopyValuesSubset(start, arrow.Uint16Traits.CastFromBytes(out)) 1076 } 1077 1078 func (s *Uint16MemoTable) WriteOutLE(out []byte) { 1079 s.tbl.WriteOut(out) 1080 } 1081 1082 func (s *Uint16MemoTable) WriteOutSubsetLE(start int, out []byte) { 1083 s.tbl.WriteOutSubset(start, out) 1084 } 1085 1086 // Get returns the index of the requested value in the hash table or KeyNotFound 1087 // along with a boolean indicating if it was found or not. 1088 func (s *Uint16MemoTable) Get(val interface{}) (int, bool) { 1089 1090 h := hashInt(uint64(val.(uint16)), 0) 1091 if e, ok := s.tbl.Lookup(h, func(v uint16) bool { return val.(uint16) == v }); ok { 1092 return int(e.payload.memoIdx), ok 1093 } 1094 return KeyNotFound, false 1095 } 1096 1097 // GetOrInsert will return the index of the specified value in the table, or insert the 1098 // value into the table and return the new index. found indicates whether or not it already 1099 // existed in the table (true) or was inserted by this call (false). 1100 func (s *Uint16MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 1101 1102 h := hashInt(uint64(val.(uint16)), 0) 1103 e, ok := s.tbl.Lookup(h, func(v uint16) bool { 1104 return val.(uint16) == v 1105 }) 1106 1107 if ok { 1108 idx = int(e.payload.memoIdx) 1109 found = true 1110 } else { 1111 idx = s.Size() 1112 s.tbl.Insert(e, h, val.(uint16), int32(idx)) 1113 } 1114 return 1115 } 1116 1117 type payloadInt32 struct { 1118 val int32 1119 memoIdx int32 1120 } 1121 1122 type entryInt32 struct { 1123 h uint64 1124 payload payloadInt32 1125 } 1126 1127 func (e entryInt32) Valid() bool { return e.h != sentinel } 1128 1129 // Int32HashTable is a hashtable specifically for int32 that 1130 // is utilized with the MemoTable to generalize interactions for easier 1131 // implementation of dictionaries without losing performance. 1132 type Int32HashTable struct { 1133 cap uint64 1134 capMask uint64 1135 size uint64 1136 1137 entries []entryInt32 1138 } 1139 1140 // NewInt32HashTable returns a new hash table for int32 values 1141 // initialized with the passed in capacity or 32 whichever is larger. 1142 func NewInt32HashTable(cap uint64) *Int32HashTable { 1143 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 1144 ret := &Int32HashTable{cap: initCap, capMask: initCap - 1, size: 0} 1145 ret.entries = make([]entryInt32, initCap) 1146 return ret 1147 } 1148 1149 // Reset drops all of the values in this hash table and re-initializes it 1150 // with the specified initial capacity as if by calling New, but without having 1151 // to reallocate the object. 1152 func (h *Int32HashTable) Reset(cap uint64) { 1153 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 1154 h.capMask = h.cap - 1 1155 h.size = 0 1156 h.entries = make([]entryInt32, h.cap) 1157 } 1158 1159 // CopyValues is used for copying the values out of the hash table into the 1160 // passed in slice, in the order that they were first inserted 1161 func (h *Int32HashTable) CopyValues(out []int32) { 1162 h.CopyValuesSubset(0, out) 1163 } 1164 1165 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 1166 // with the value at start, in the order that they were inserted. 1167 func (h *Int32HashTable) CopyValuesSubset(start int, out []int32) { 1168 h.VisitEntries(func(e *entryInt32) { 1169 idx := e.payload.memoIdx - int32(start) 1170 if idx >= 0 { 1171 out[idx] = e.payload.val 1172 } 1173 }) 1174 } 1175 1176 func (h *Int32HashTable) WriteOut(out []byte) { 1177 h.WriteOutSubset(0, out) 1178 } 1179 1180 func (h *Int32HashTable) WriteOutSubset(start int, out []byte) { 1181 data := arrow.Int32Traits.CastFromBytes(out) 1182 h.VisitEntries(func(e *entryInt32) { 1183 idx := e.payload.memoIdx - int32(start) 1184 if idx >= 0 { 1185 data[idx] = utils.ToLEInt32(e.payload.val) 1186 } 1187 }) 1188 } 1189 1190 func (h *Int32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 1191 1192 func (Int32HashTable) fixHash(v uint64) uint64 { 1193 if v == sentinel { 1194 return 42 1195 } 1196 return v 1197 } 1198 1199 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 1200 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 1201 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 1202 func (h *Int32HashTable) Lookup(v uint64, cmp func(int32) bool) (*entryInt32, bool) { 1203 idx, ok := h.lookup(v, h.capMask, cmp) 1204 return &h.entries[idx], ok 1205 } 1206 1207 func (h *Int32HashTable) lookup(v uint64, szMask uint64, cmp func(int32) bool) (uint64, bool) { 1208 const perturbShift uint8 = 5 1209 1210 var ( 1211 idx uint64 1212 perturb uint64 1213 e *entryInt32 1214 ) 1215 1216 v = h.fixHash(v) 1217 idx = v & szMask 1218 perturb = (v >> uint64(perturbShift)) + 1 1219 1220 for { 1221 e = &h.entries[idx] 1222 if e.h == v && cmp(e.payload.val) { 1223 return idx, true 1224 } 1225 1226 if e.h == sentinel { 1227 return idx, false 1228 } 1229 1230 // perturbation logic inspired from CPython's set/dict object 1231 // the goal is that all 64 bits of unmasked hash value eventually 1232 // participate int he probing sequence, to minimize clustering 1233 idx = (idx + perturb) & szMask 1234 perturb = (perturb >> uint64(perturbShift)) + 1 1235 } 1236 } 1237 1238 func (h *Int32HashTable) upsize(newcap uint64) error { 1239 newMask := newcap - 1 1240 1241 oldEntries := h.entries 1242 h.entries = make([]entryInt32, newcap) 1243 for _, e := range oldEntries { 1244 if e.Valid() { 1245 idx, _ := h.lookup(e.h, newMask, func(int32) bool { return false }) 1246 h.entries[idx] = e 1247 } 1248 } 1249 h.cap = newcap 1250 h.capMask = newMask 1251 return nil 1252 } 1253 1254 // Insert updates the given entry with the provided hash value, payload value and memo index. 1255 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 1256 func (h *Int32HashTable) Insert(e *entryInt32, v uint64, val int32, memoIdx int32) error { 1257 e.h = h.fixHash(v) 1258 e.payload.val = val 1259 e.payload.memoIdx = memoIdx 1260 h.size++ 1261 1262 if h.needUpsize() { 1263 h.upsize(h.cap * uint64(loadFactor) * 2) 1264 } 1265 return nil 1266 } 1267 1268 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 1269 // a valid entry being one which has had a value inserted into it. 1270 func (h *Int32HashTable) VisitEntries(visit func(*entryInt32)) { 1271 for _, e := range h.entries { 1272 if e.Valid() { 1273 visit(&e) 1274 } 1275 } 1276 } 1277 1278 // Int32MemoTable is a wrapper over the appropriate hashtable to provide an interface 1279 // conforming to the MemoTable interface defined in the encoding package for general interactions 1280 // regarding dictionaries. 1281 type Int32MemoTable struct { 1282 tbl *Int32HashTable 1283 nullIdx int32 1284 } 1285 1286 // NewInt32MemoTable returns a new memotable with num entries pre-allocated to reduce further 1287 // allocations when inserting. 1288 func NewInt32MemoTable(num int64) *Int32MemoTable { 1289 return &Int32MemoTable{tbl: NewInt32HashTable(uint64(num)), nullIdx: KeyNotFound} 1290 } 1291 1292 func (Int32MemoTable) TypeTraits() TypeTraits { 1293 return arrow.Int32Traits 1294 } 1295 1296 // Reset allows this table to be re-used by dumping all the data currently in the table. 1297 func (s *Int32MemoTable) Reset() { 1298 s.tbl.Reset(32) 1299 s.nullIdx = KeyNotFound 1300 } 1301 1302 // Size returns the current number of inserted elements into the table including if a null 1303 // has been inserted. 1304 func (s *Int32MemoTable) Size() int { 1305 sz := int(s.tbl.size) 1306 if _, ok := s.GetNull(); ok { 1307 sz++ 1308 } 1309 return sz 1310 } 1311 1312 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 1313 // that will be true if found and false if not. 1314 func (s *Int32MemoTable) GetNull() (int, bool) { 1315 return int(s.nullIdx), s.nullIdx != KeyNotFound 1316 } 1317 1318 // GetOrInsertNull will return the index of the null entry or insert a null entry 1319 // if one currently doesn't exist. The found value will be true if there was already 1320 // a null in the table, and false if it inserted one. 1321 func (s *Int32MemoTable) GetOrInsertNull() (idx int, found bool) { 1322 idx, found = s.GetNull() 1323 if !found { 1324 idx = s.Size() 1325 s.nullIdx = int32(idx) 1326 } 1327 return 1328 } 1329 1330 // CopyValues will copy the values from the memo table out into the passed in slice 1331 // which must be of the appropriate type. 1332 func (s *Int32MemoTable) CopyValues(out interface{}) { 1333 s.CopyValuesSubset(0, out) 1334 } 1335 1336 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 1337 // at the provided start index 1338 func (s *Int32MemoTable) CopyValuesSubset(start int, out interface{}) { 1339 s.tbl.CopyValuesSubset(start, out.([]int32)) 1340 } 1341 1342 func (s *Int32MemoTable) WriteOut(out []byte) { 1343 s.tbl.CopyValues(arrow.Int32Traits.CastFromBytes(out)) 1344 } 1345 1346 func (s *Int32MemoTable) WriteOutSubset(start int, out []byte) { 1347 s.tbl.CopyValuesSubset(start, arrow.Int32Traits.CastFromBytes(out)) 1348 } 1349 1350 func (s *Int32MemoTable) WriteOutLE(out []byte) { 1351 s.tbl.WriteOut(out) 1352 } 1353 1354 func (s *Int32MemoTable) WriteOutSubsetLE(start int, out []byte) { 1355 s.tbl.WriteOutSubset(start, out) 1356 } 1357 1358 // Get returns the index of the requested value in the hash table or KeyNotFound 1359 // along with a boolean indicating if it was found or not. 1360 func (s *Int32MemoTable) Get(val interface{}) (int, bool) { 1361 1362 h := hashInt(uint64(val.(int32)), 0) 1363 if e, ok := s.tbl.Lookup(h, func(v int32) bool { return val.(int32) == v }); ok { 1364 return int(e.payload.memoIdx), ok 1365 } 1366 return KeyNotFound, false 1367 } 1368 1369 // GetOrInsert will return the index of the specified value in the table, or insert the 1370 // value into the table and return the new index. found indicates whether or not it already 1371 // existed in the table (true) or was inserted by this call (false). 1372 func (s *Int32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 1373 1374 h := hashInt(uint64(val.(int32)), 0) 1375 e, ok := s.tbl.Lookup(h, func(v int32) bool { 1376 return val.(int32) == v 1377 }) 1378 1379 if ok { 1380 idx = int(e.payload.memoIdx) 1381 found = true 1382 } else { 1383 idx = s.Size() 1384 s.tbl.Insert(e, h, val.(int32), int32(idx)) 1385 } 1386 return 1387 } 1388 1389 type payloadInt64 struct { 1390 val int64 1391 memoIdx int32 1392 } 1393 1394 type entryInt64 struct { 1395 h uint64 1396 payload payloadInt64 1397 } 1398 1399 func (e entryInt64) Valid() bool { return e.h != sentinel } 1400 1401 // Int64HashTable is a hashtable specifically for int64 that 1402 // is utilized with the MemoTable to generalize interactions for easier 1403 // implementation of dictionaries without losing performance. 1404 type Int64HashTable struct { 1405 cap uint64 1406 capMask uint64 1407 size uint64 1408 1409 entries []entryInt64 1410 } 1411 1412 // NewInt64HashTable returns a new hash table for int64 values 1413 // initialized with the passed in capacity or 32 whichever is larger. 1414 func NewInt64HashTable(cap uint64) *Int64HashTable { 1415 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 1416 ret := &Int64HashTable{cap: initCap, capMask: initCap - 1, size: 0} 1417 ret.entries = make([]entryInt64, initCap) 1418 return ret 1419 } 1420 1421 // Reset drops all of the values in this hash table and re-initializes it 1422 // with the specified initial capacity as if by calling New, but without having 1423 // to reallocate the object. 1424 func (h *Int64HashTable) Reset(cap uint64) { 1425 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 1426 h.capMask = h.cap - 1 1427 h.size = 0 1428 h.entries = make([]entryInt64, h.cap) 1429 } 1430 1431 // CopyValues is used for copying the values out of the hash table into the 1432 // passed in slice, in the order that they were first inserted 1433 func (h *Int64HashTable) CopyValues(out []int64) { 1434 h.CopyValuesSubset(0, out) 1435 } 1436 1437 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 1438 // with the value at start, in the order that they were inserted. 1439 func (h *Int64HashTable) CopyValuesSubset(start int, out []int64) { 1440 h.VisitEntries(func(e *entryInt64) { 1441 idx := e.payload.memoIdx - int32(start) 1442 if idx >= 0 { 1443 out[idx] = e.payload.val 1444 } 1445 }) 1446 } 1447 1448 func (h *Int64HashTable) WriteOut(out []byte) { 1449 h.WriteOutSubset(0, out) 1450 } 1451 1452 func (h *Int64HashTable) WriteOutSubset(start int, out []byte) { 1453 data := arrow.Int64Traits.CastFromBytes(out) 1454 h.VisitEntries(func(e *entryInt64) { 1455 idx := e.payload.memoIdx - int32(start) 1456 if idx >= 0 { 1457 data[idx] = utils.ToLEInt64(e.payload.val) 1458 } 1459 }) 1460 } 1461 1462 func (h *Int64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 1463 1464 func (Int64HashTable) fixHash(v uint64) uint64 { 1465 if v == sentinel { 1466 return 42 1467 } 1468 return v 1469 } 1470 1471 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 1472 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 1473 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 1474 func (h *Int64HashTable) Lookup(v uint64, cmp func(int64) bool) (*entryInt64, bool) { 1475 idx, ok := h.lookup(v, h.capMask, cmp) 1476 return &h.entries[idx], ok 1477 } 1478 1479 func (h *Int64HashTable) lookup(v uint64, szMask uint64, cmp func(int64) bool) (uint64, bool) { 1480 const perturbShift uint8 = 5 1481 1482 var ( 1483 idx uint64 1484 perturb uint64 1485 e *entryInt64 1486 ) 1487 1488 v = h.fixHash(v) 1489 idx = v & szMask 1490 perturb = (v >> uint64(perturbShift)) + 1 1491 1492 for { 1493 e = &h.entries[idx] 1494 if e.h == v && cmp(e.payload.val) { 1495 return idx, true 1496 } 1497 1498 if e.h == sentinel { 1499 return idx, false 1500 } 1501 1502 // perturbation logic inspired from CPython's set/dict object 1503 // the goal is that all 64 bits of unmasked hash value eventually 1504 // participate int he probing sequence, to minimize clustering 1505 idx = (idx + perturb) & szMask 1506 perturb = (perturb >> uint64(perturbShift)) + 1 1507 } 1508 } 1509 1510 func (h *Int64HashTable) upsize(newcap uint64) error { 1511 newMask := newcap - 1 1512 1513 oldEntries := h.entries 1514 h.entries = make([]entryInt64, newcap) 1515 for _, e := range oldEntries { 1516 if e.Valid() { 1517 idx, _ := h.lookup(e.h, newMask, func(int64) bool { return false }) 1518 h.entries[idx] = e 1519 } 1520 } 1521 h.cap = newcap 1522 h.capMask = newMask 1523 return nil 1524 } 1525 1526 // Insert updates the given entry with the provided hash value, payload value and memo index. 1527 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 1528 func (h *Int64HashTable) Insert(e *entryInt64, v uint64, val int64, memoIdx int32) error { 1529 e.h = h.fixHash(v) 1530 e.payload.val = val 1531 e.payload.memoIdx = memoIdx 1532 h.size++ 1533 1534 if h.needUpsize() { 1535 h.upsize(h.cap * uint64(loadFactor) * 2) 1536 } 1537 return nil 1538 } 1539 1540 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 1541 // a valid entry being one which has had a value inserted into it. 1542 func (h *Int64HashTable) VisitEntries(visit func(*entryInt64)) { 1543 for _, e := range h.entries { 1544 if e.Valid() { 1545 visit(&e) 1546 } 1547 } 1548 } 1549 1550 // Int64MemoTable is a wrapper over the appropriate hashtable to provide an interface 1551 // conforming to the MemoTable interface defined in the encoding package for general interactions 1552 // regarding dictionaries. 1553 type Int64MemoTable struct { 1554 tbl *Int64HashTable 1555 nullIdx int32 1556 } 1557 1558 // NewInt64MemoTable returns a new memotable with num entries pre-allocated to reduce further 1559 // allocations when inserting. 1560 func NewInt64MemoTable(num int64) *Int64MemoTable { 1561 return &Int64MemoTable{tbl: NewInt64HashTable(uint64(num)), nullIdx: KeyNotFound} 1562 } 1563 1564 func (Int64MemoTable) TypeTraits() TypeTraits { 1565 return arrow.Int64Traits 1566 } 1567 1568 // Reset allows this table to be re-used by dumping all the data currently in the table. 1569 func (s *Int64MemoTable) Reset() { 1570 s.tbl.Reset(32) 1571 s.nullIdx = KeyNotFound 1572 } 1573 1574 // Size returns the current number of inserted elements into the table including if a null 1575 // has been inserted. 1576 func (s *Int64MemoTable) Size() int { 1577 sz := int(s.tbl.size) 1578 if _, ok := s.GetNull(); ok { 1579 sz++ 1580 } 1581 return sz 1582 } 1583 1584 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 1585 // that will be true if found and false if not. 1586 func (s *Int64MemoTable) GetNull() (int, bool) { 1587 return int(s.nullIdx), s.nullIdx != KeyNotFound 1588 } 1589 1590 // GetOrInsertNull will return the index of the null entry or insert a null entry 1591 // if one currently doesn't exist. The found value will be true if there was already 1592 // a null in the table, and false if it inserted one. 1593 func (s *Int64MemoTable) GetOrInsertNull() (idx int, found bool) { 1594 idx, found = s.GetNull() 1595 if !found { 1596 idx = s.Size() 1597 s.nullIdx = int32(idx) 1598 } 1599 return 1600 } 1601 1602 // CopyValues will copy the values from the memo table out into the passed in slice 1603 // which must be of the appropriate type. 1604 func (s *Int64MemoTable) CopyValues(out interface{}) { 1605 s.CopyValuesSubset(0, out) 1606 } 1607 1608 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 1609 // at the provided start index 1610 func (s *Int64MemoTable) CopyValuesSubset(start int, out interface{}) { 1611 s.tbl.CopyValuesSubset(start, out.([]int64)) 1612 } 1613 1614 func (s *Int64MemoTable) WriteOut(out []byte) { 1615 s.tbl.CopyValues(arrow.Int64Traits.CastFromBytes(out)) 1616 } 1617 1618 func (s *Int64MemoTable) WriteOutSubset(start int, out []byte) { 1619 s.tbl.CopyValuesSubset(start, arrow.Int64Traits.CastFromBytes(out)) 1620 } 1621 1622 func (s *Int64MemoTable) WriteOutLE(out []byte) { 1623 s.tbl.WriteOut(out) 1624 } 1625 1626 func (s *Int64MemoTable) WriteOutSubsetLE(start int, out []byte) { 1627 s.tbl.WriteOutSubset(start, out) 1628 } 1629 1630 // Get returns the index of the requested value in the hash table or KeyNotFound 1631 // along with a boolean indicating if it was found or not. 1632 func (s *Int64MemoTable) Get(val interface{}) (int, bool) { 1633 1634 h := hashInt(uint64(val.(int64)), 0) 1635 if e, ok := s.tbl.Lookup(h, func(v int64) bool { return val.(int64) == v }); ok { 1636 return int(e.payload.memoIdx), ok 1637 } 1638 return KeyNotFound, false 1639 } 1640 1641 // GetOrInsert will return the index of the specified value in the table, or insert the 1642 // value into the table and return the new index. found indicates whether or not it already 1643 // existed in the table (true) or was inserted by this call (false). 1644 func (s *Int64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 1645 1646 h := hashInt(uint64(val.(int64)), 0) 1647 e, ok := s.tbl.Lookup(h, func(v int64) bool { 1648 return val.(int64) == v 1649 }) 1650 1651 if ok { 1652 idx = int(e.payload.memoIdx) 1653 found = true 1654 } else { 1655 idx = s.Size() 1656 s.tbl.Insert(e, h, val.(int64), int32(idx)) 1657 } 1658 return 1659 } 1660 1661 type payloadUint32 struct { 1662 val uint32 1663 memoIdx int32 1664 } 1665 1666 type entryUint32 struct { 1667 h uint64 1668 payload payloadUint32 1669 } 1670 1671 func (e entryUint32) Valid() bool { return e.h != sentinel } 1672 1673 // Uint32HashTable is a hashtable specifically for uint32 that 1674 // is utilized with the MemoTable to generalize interactions for easier 1675 // implementation of dictionaries without losing performance. 1676 type Uint32HashTable struct { 1677 cap uint64 1678 capMask uint64 1679 size uint64 1680 1681 entries []entryUint32 1682 } 1683 1684 // NewUint32HashTable returns a new hash table for uint32 values 1685 // initialized with the passed in capacity or 32 whichever is larger. 1686 func NewUint32HashTable(cap uint64) *Uint32HashTable { 1687 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 1688 ret := &Uint32HashTable{cap: initCap, capMask: initCap - 1, size: 0} 1689 ret.entries = make([]entryUint32, initCap) 1690 return ret 1691 } 1692 1693 // Reset drops all of the values in this hash table and re-initializes it 1694 // with the specified initial capacity as if by calling New, but without having 1695 // to reallocate the object. 1696 func (h *Uint32HashTable) Reset(cap uint64) { 1697 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 1698 h.capMask = h.cap - 1 1699 h.size = 0 1700 h.entries = make([]entryUint32, h.cap) 1701 } 1702 1703 // CopyValues is used for copying the values out of the hash table into the 1704 // passed in slice, in the order that they were first inserted 1705 func (h *Uint32HashTable) CopyValues(out []uint32) { 1706 h.CopyValuesSubset(0, out) 1707 } 1708 1709 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 1710 // with the value at start, in the order that they were inserted. 1711 func (h *Uint32HashTable) CopyValuesSubset(start int, out []uint32) { 1712 h.VisitEntries(func(e *entryUint32) { 1713 idx := e.payload.memoIdx - int32(start) 1714 if idx >= 0 { 1715 out[idx] = e.payload.val 1716 } 1717 }) 1718 } 1719 1720 func (h *Uint32HashTable) WriteOut(out []byte) { 1721 h.WriteOutSubset(0, out) 1722 } 1723 1724 func (h *Uint32HashTable) WriteOutSubset(start int, out []byte) { 1725 data := arrow.Uint32Traits.CastFromBytes(out) 1726 h.VisitEntries(func(e *entryUint32) { 1727 idx := e.payload.memoIdx - int32(start) 1728 if idx >= 0 { 1729 data[idx] = utils.ToLEUint32(e.payload.val) 1730 } 1731 }) 1732 } 1733 1734 func (h *Uint32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 1735 1736 func (Uint32HashTable) fixHash(v uint64) uint64 { 1737 if v == sentinel { 1738 return 42 1739 } 1740 return v 1741 } 1742 1743 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 1744 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 1745 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 1746 func (h *Uint32HashTable) Lookup(v uint64, cmp func(uint32) bool) (*entryUint32, bool) { 1747 idx, ok := h.lookup(v, h.capMask, cmp) 1748 return &h.entries[idx], ok 1749 } 1750 1751 func (h *Uint32HashTable) lookup(v uint64, szMask uint64, cmp func(uint32) bool) (uint64, bool) { 1752 const perturbShift uint8 = 5 1753 1754 var ( 1755 idx uint64 1756 perturb uint64 1757 e *entryUint32 1758 ) 1759 1760 v = h.fixHash(v) 1761 idx = v & szMask 1762 perturb = (v >> uint64(perturbShift)) + 1 1763 1764 for { 1765 e = &h.entries[idx] 1766 if e.h == v && cmp(e.payload.val) { 1767 return idx, true 1768 } 1769 1770 if e.h == sentinel { 1771 return idx, false 1772 } 1773 1774 // perturbation logic inspired from CPython's set/dict object 1775 // the goal is that all 64 bits of unmasked hash value eventually 1776 // participate int he probing sequence, to minimize clustering 1777 idx = (idx + perturb) & szMask 1778 perturb = (perturb >> uint64(perturbShift)) + 1 1779 } 1780 } 1781 1782 func (h *Uint32HashTable) upsize(newcap uint64) error { 1783 newMask := newcap - 1 1784 1785 oldEntries := h.entries 1786 h.entries = make([]entryUint32, newcap) 1787 for _, e := range oldEntries { 1788 if e.Valid() { 1789 idx, _ := h.lookup(e.h, newMask, func(uint32) bool { return false }) 1790 h.entries[idx] = e 1791 } 1792 } 1793 h.cap = newcap 1794 h.capMask = newMask 1795 return nil 1796 } 1797 1798 // Insert updates the given entry with the provided hash value, payload value and memo index. 1799 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 1800 func (h *Uint32HashTable) Insert(e *entryUint32, v uint64, val uint32, memoIdx int32) error { 1801 e.h = h.fixHash(v) 1802 e.payload.val = val 1803 e.payload.memoIdx = memoIdx 1804 h.size++ 1805 1806 if h.needUpsize() { 1807 h.upsize(h.cap * uint64(loadFactor) * 2) 1808 } 1809 return nil 1810 } 1811 1812 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 1813 // a valid entry being one which has had a value inserted into it. 1814 func (h *Uint32HashTable) VisitEntries(visit func(*entryUint32)) { 1815 for _, e := range h.entries { 1816 if e.Valid() { 1817 visit(&e) 1818 } 1819 } 1820 } 1821 1822 // Uint32MemoTable is a wrapper over the appropriate hashtable to provide an interface 1823 // conforming to the MemoTable interface defined in the encoding package for general interactions 1824 // regarding dictionaries. 1825 type Uint32MemoTable struct { 1826 tbl *Uint32HashTable 1827 nullIdx int32 1828 } 1829 1830 // NewUint32MemoTable returns a new memotable with num entries pre-allocated to reduce further 1831 // allocations when inserting. 1832 func NewUint32MemoTable(num int64) *Uint32MemoTable { 1833 return &Uint32MemoTable{tbl: NewUint32HashTable(uint64(num)), nullIdx: KeyNotFound} 1834 } 1835 1836 func (Uint32MemoTable) TypeTraits() TypeTraits { 1837 return arrow.Uint32Traits 1838 } 1839 1840 // Reset allows this table to be re-used by dumping all the data currently in the table. 1841 func (s *Uint32MemoTable) Reset() { 1842 s.tbl.Reset(32) 1843 s.nullIdx = KeyNotFound 1844 } 1845 1846 // Size returns the current number of inserted elements into the table including if a null 1847 // has been inserted. 1848 func (s *Uint32MemoTable) Size() int { 1849 sz := int(s.tbl.size) 1850 if _, ok := s.GetNull(); ok { 1851 sz++ 1852 } 1853 return sz 1854 } 1855 1856 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 1857 // that will be true if found and false if not. 1858 func (s *Uint32MemoTable) GetNull() (int, bool) { 1859 return int(s.nullIdx), s.nullIdx != KeyNotFound 1860 } 1861 1862 // GetOrInsertNull will return the index of the null entry or insert a null entry 1863 // if one currently doesn't exist. The found value will be true if there was already 1864 // a null in the table, and false if it inserted one. 1865 func (s *Uint32MemoTable) GetOrInsertNull() (idx int, found bool) { 1866 idx, found = s.GetNull() 1867 if !found { 1868 idx = s.Size() 1869 s.nullIdx = int32(idx) 1870 } 1871 return 1872 } 1873 1874 // CopyValues will copy the values from the memo table out into the passed in slice 1875 // which must be of the appropriate type. 1876 func (s *Uint32MemoTable) CopyValues(out interface{}) { 1877 s.CopyValuesSubset(0, out) 1878 } 1879 1880 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 1881 // at the provided start index 1882 func (s *Uint32MemoTable) CopyValuesSubset(start int, out interface{}) { 1883 s.tbl.CopyValuesSubset(start, out.([]uint32)) 1884 } 1885 1886 func (s *Uint32MemoTable) WriteOut(out []byte) { 1887 s.tbl.CopyValues(arrow.Uint32Traits.CastFromBytes(out)) 1888 } 1889 1890 func (s *Uint32MemoTable) WriteOutSubset(start int, out []byte) { 1891 s.tbl.CopyValuesSubset(start, arrow.Uint32Traits.CastFromBytes(out)) 1892 } 1893 1894 func (s *Uint32MemoTable) WriteOutLE(out []byte) { 1895 s.tbl.WriteOut(out) 1896 } 1897 1898 func (s *Uint32MemoTable) WriteOutSubsetLE(start int, out []byte) { 1899 s.tbl.WriteOutSubset(start, out) 1900 } 1901 1902 // Get returns the index of the requested value in the hash table or KeyNotFound 1903 // along with a boolean indicating if it was found or not. 1904 func (s *Uint32MemoTable) Get(val interface{}) (int, bool) { 1905 1906 h := hashInt(uint64(val.(uint32)), 0) 1907 if e, ok := s.tbl.Lookup(h, func(v uint32) bool { return val.(uint32) == v }); ok { 1908 return int(e.payload.memoIdx), ok 1909 } 1910 return KeyNotFound, false 1911 } 1912 1913 // GetOrInsert will return the index of the specified value in the table, or insert the 1914 // value into the table and return the new index. found indicates whether or not it already 1915 // existed in the table (true) or was inserted by this call (false). 1916 func (s *Uint32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 1917 1918 h := hashInt(uint64(val.(uint32)), 0) 1919 e, ok := s.tbl.Lookup(h, func(v uint32) bool { 1920 return val.(uint32) == v 1921 }) 1922 1923 if ok { 1924 idx = int(e.payload.memoIdx) 1925 found = true 1926 } else { 1927 idx = s.Size() 1928 s.tbl.Insert(e, h, val.(uint32), int32(idx)) 1929 } 1930 return 1931 } 1932 1933 type payloadUint64 struct { 1934 val uint64 1935 memoIdx int32 1936 } 1937 1938 type entryUint64 struct { 1939 h uint64 1940 payload payloadUint64 1941 } 1942 1943 func (e entryUint64) Valid() bool { return e.h != sentinel } 1944 1945 // Uint64HashTable is a hashtable specifically for uint64 that 1946 // is utilized with the MemoTable to generalize interactions for easier 1947 // implementation of dictionaries without losing performance. 1948 type Uint64HashTable struct { 1949 cap uint64 1950 capMask uint64 1951 size uint64 1952 1953 entries []entryUint64 1954 } 1955 1956 // NewUint64HashTable returns a new hash table for uint64 values 1957 // initialized with the passed in capacity or 32 whichever is larger. 1958 func NewUint64HashTable(cap uint64) *Uint64HashTable { 1959 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 1960 ret := &Uint64HashTable{cap: initCap, capMask: initCap - 1, size: 0} 1961 ret.entries = make([]entryUint64, initCap) 1962 return ret 1963 } 1964 1965 // Reset drops all of the values in this hash table and re-initializes it 1966 // with the specified initial capacity as if by calling New, but without having 1967 // to reallocate the object. 1968 func (h *Uint64HashTable) Reset(cap uint64) { 1969 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 1970 h.capMask = h.cap - 1 1971 h.size = 0 1972 h.entries = make([]entryUint64, h.cap) 1973 } 1974 1975 // CopyValues is used for copying the values out of the hash table into the 1976 // passed in slice, in the order that they were first inserted 1977 func (h *Uint64HashTable) CopyValues(out []uint64) { 1978 h.CopyValuesSubset(0, out) 1979 } 1980 1981 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 1982 // with the value at start, in the order that they were inserted. 1983 func (h *Uint64HashTable) CopyValuesSubset(start int, out []uint64) { 1984 h.VisitEntries(func(e *entryUint64) { 1985 idx := e.payload.memoIdx - int32(start) 1986 if idx >= 0 { 1987 out[idx] = e.payload.val 1988 } 1989 }) 1990 } 1991 1992 func (h *Uint64HashTable) WriteOut(out []byte) { 1993 h.WriteOutSubset(0, out) 1994 } 1995 1996 func (h *Uint64HashTable) WriteOutSubset(start int, out []byte) { 1997 data := arrow.Uint64Traits.CastFromBytes(out) 1998 h.VisitEntries(func(e *entryUint64) { 1999 idx := e.payload.memoIdx - int32(start) 2000 if idx >= 0 { 2001 data[idx] = utils.ToLEUint64(e.payload.val) 2002 } 2003 }) 2004 } 2005 2006 func (h *Uint64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 2007 2008 func (Uint64HashTable) fixHash(v uint64) uint64 { 2009 if v == sentinel { 2010 return 42 2011 } 2012 return v 2013 } 2014 2015 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 2016 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 2017 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 2018 func (h *Uint64HashTable) Lookup(v uint64, cmp func(uint64) bool) (*entryUint64, bool) { 2019 idx, ok := h.lookup(v, h.capMask, cmp) 2020 return &h.entries[idx], ok 2021 } 2022 2023 func (h *Uint64HashTable) lookup(v uint64, szMask uint64, cmp func(uint64) bool) (uint64, bool) { 2024 const perturbShift uint8 = 5 2025 2026 var ( 2027 idx uint64 2028 perturb uint64 2029 e *entryUint64 2030 ) 2031 2032 v = h.fixHash(v) 2033 idx = v & szMask 2034 perturb = (v >> uint64(perturbShift)) + 1 2035 2036 for { 2037 e = &h.entries[idx] 2038 if e.h == v && cmp(e.payload.val) { 2039 return idx, true 2040 } 2041 2042 if e.h == sentinel { 2043 return idx, false 2044 } 2045 2046 // perturbation logic inspired from CPython's set/dict object 2047 // the goal is that all 64 bits of unmasked hash value eventually 2048 // participate int he probing sequence, to minimize clustering 2049 idx = (idx + perturb) & szMask 2050 perturb = (perturb >> uint64(perturbShift)) + 1 2051 } 2052 } 2053 2054 func (h *Uint64HashTable) upsize(newcap uint64) error { 2055 newMask := newcap - 1 2056 2057 oldEntries := h.entries 2058 h.entries = make([]entryUint64, newcap) 2059 for _, e := range oldEntries { 2060 if e.Valid() { 2061 idx, _ := h.lookup(e.h, newMask, func(uint64) bool { return false }) 2062 h.entries[idx] = e 2063 } 2064 } 2065 h.cap = newcap 2066 h.capMask = newMask 2067 return nil 2068 } 2069 2070 // Insert updates the given entry with the provided hash value, payload value and memo index. 2071 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 2072 func (h *Uint64HashTable) Insert(e *entryUint64, v uint64, val uint64, memoIdx int32) error { 2073 e.h = h.fixHash(v) 2074 e.payload.val = val 2075 e.payload.memoIdx = memoIdx 2076 h.size++ 2077 2078 if h.needUpsize() { 2079 h.upsize(h.cap * uint64(loadFactor) * 2) 2080 } 2081 return nil 2082 } 2083 2084 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 2085 // a valid entry being one which has had a value inserted into it. 2086 func (h *Uint64HashTable) VisitEntries(visit func(*entryUint64)) { 2087 for _, e := range h.entries { 2088 if e.Valid() { 2089 visit(&e) 2090 } 2091 } 2092 } 2093 2094 // Uint64MemoTable is a wrapper over the appropriate hashtable to provide an interface 2095 // conforming to the MemoTable interface defined in the encoding package for general interactions 2096 // regarding dictionaries. 2097 type Uint64MemoTable struct { 2098 tbl *Uint64HashTable 2099 nullIdx int32 2100 } 2101 2102 // NewUint64MemoTable returns a new memotable with num entries pre-allocated to reduce further 2103 // allocations when inserting. 2104 func NewUint64MemoTable(num int64) *Uint64MemoTable { 2105 return &Uint64MemoTable{tbl: NewUint64HashTable(uint64(num)), nullIdx: KeyNotFound} 2106 } 2107 2108 func (Uint64MemoTable) TypeTraits() TypeTraits { 2109 return arrow.Uint64Traits 2110 } 2111 2112 // Reset allows this table to be re-used by dumping all the data currently in the table. 2113 func (s *Uint64MemoTable) Reset() { 2114 s.tbl.Reset(32) 2115 s.nullIdx = KeyNotFound 2116 } 2117 2118 // Size returns the current number of inserted elements into the table including if a null 2119 // has been inserted. 2120 func (s *Uint64MemoTable) Size() int { 2121 sz := int(s.tbl.size) 2122 if _, ok := s.GetNull(); ok { 2123 sz++ 2124 } 2125 return sz 2126 } 2127 2128 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 2129 // that will be true if found and false if not. 2130 func (s *Uint64MemoTable) GetNull() (int, bool) { 2131 return int(s.nullIdx), s.nullIdx != KeyNotFound 2132 } 2133 2134 // GetOrInsertNull will return the index of the null entry or insert a null entry 2135 // if one currently doesn't exist. The found value will be true if there was already 2136 // a null in the table, and false if it inserted one. 2137 func (s *Uint64MemoTable) GetOrInsertNull() (idx int, found bool) { 2138 idx, found = s.GetNull() 2139 if !found { 2140 idx = s.Size() 2141 s.nullIdx = int32(idx) 2142 } 2143 return 2144 } 2145 2146 // CopyValues will copy the values from the memo table out into the passed in slice 2147 // which must be of the appropriate type. 2148 func (s *Uint64MemoTable) CopyValues(out interface{}) { 2149 s.CopyValuesSubset(0, out) 2150 } 2151 2152 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 2153 // at the provided start index 2154 func (s *Uint64MemoTable) CopyValuesSubset(start int, out interface{}) { 2155 s.tbl.CopyValuesSubset(start, out.([]uint64)) 2156 } 2157 2158 func (s *Uint64MemoTable) WriteOut(out []byte) { 2159 s.tbl.CopyValues(arrow.Uint64Traits.CastFromBytes(out)) 2160 } 2161 2162 func (s *Uint64MemoTable) WriteOutSubset(start int, out []byte) { 2163 s.tbl.CopyValuesSubset(start, arrow.Uint64Traits.CastFromBytes(out)) 2164 } 2165 2166 func (s *Uint64MemoTable) WriteOutLE(out []byte) { 2167 s.tbl.WriteOut(out) 2168 } 2169 2170 func (s *Uint64MemoTable) WriteOutSubsetLE(start int, out []byte) { 2171 s.tbl.WriteOutSubset(start, out) 2172 } 2173 2174 // Get returns the index of the requested value in the hash table or KeyNotFound 2175 // along with a boolean indicating if it was found or not. 2176 func (s *Uint64MemoTable) Get(val interface{}) (int, bool) { 2177 2178 h := hashInt(uint64(val.(uint64)), 0) 2179 if e, ok := s.tbl.Lookup(h, func(v uint64) bool { return val.(uint64) == v }); ok { 2180 return int(e.payload.memoIdx), ok 2181 } 2182 return KeyNotFound, false 2183 } 2184 2185 // GetOrInsert will return the index of the specified value in the table, or insert the 2186 // value into the table and return the new index. found indicates whether or not it already 2187 // existed in the table (true) or was inserted by this call (false). 2188 func (s *Uint64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 2189 2190 h := hashInt(uint64(val.(uint64)), 0) 2191 e, ok := s.tbl.Lookup(h, func(v uint64) bool { 2192 return val.(uint64) == v 2193 }) 2194 2195 if ok { 2196 idx = int(e.payload.memoIdx) 2197 found = true 2198 } else { 2199 idx = s.Size() 2200 s.tbl.Insert(e, h, val.(uint64), int32(idx)) 2201 } 2202 return 2203 } 2204 2205 type payloadFloat32 struct { 2206 val float32 2207 memoIdx int32 2208 } 2209 2210 type entryFloat32 struct { 2211 h uint64 2212 payload payloadFloat32 2213 } 2214 2215 func (e entryFloat32) Valid() bool { return e.h != sentinel } 2216 2217 // Float32HashTable is a hashtable specifically for float32 that 2218 // is utilized with the MemoTable to generalize interactions for easier 2219 // implementation of dictionaries without losing performance. 2220 type Float32HashTable struct { 2221 cap uint64 2222 capMask uint64 2223 size uint64 2224 2225 entries []entryFloat32 2226 } 2227 2228 // NewFloat32HashTable returns a new hash table for float32 values 2229 // initialized with the passed in capacity or 32 whichever is larger. 2230 func NewFloat32HashTable(cap uint64) *Float32HashTable { 2231 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 2232 ret := &Float32HashTable{cap: initCap, capMask: initCap - 1, size: 0} 2233 ret.entries = make([]entryFloat32, initCap) 2234 return ret 2235 } 2236 2237 // Reset drops all of the values in this hash table and re-initializes it 2238 // with the specified initial capacity as if by calling New, but without having 2239 // to reallocate the object. 2240 func (h *Float32HashTable) Reset(cap uint64) { 2241 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 2242 h.capMask = h.cap - 1 2243 h.size = 0 2244 h.entries = make([]entryFloat32, h.cap) 2245 } 2246 2247 // CopyValues is used for copying the values out of the hash table into the 2248 // passed in slice, in the order that they were first inserted 2249 func (h *Float32HashTable) CopyValues(out []float32) { 2250 h.CopyValuesSubset(0, out) 2251 } 2252 2253 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 2254 // with the value at start, in the order that they were inserted. 2255 func (h *Float32HashTable) CopyValuesSubset(start int, out []float32) { 2256 h.VisitEntries(func(e *entryFloat32) { 2257 idx := e.payload.memoIdx - int32(start) 2258 if idx >= 0 { 2259 out[idx] = e.payload.val 2260 } 2261 }) 2262 } 2263 2264 func (h *Float32HashTable) WriteOut(out []byte) { 2265 h.WriteOutSubset(0, out) 2266 } 2267 2268 func (h *Float32HashTable) WriteOutSubset(start int, out []byte) { 2269 data := arrow.Float32Traits.CastFromBytes(out) 2270 h.VisitEntries(func(e *entryFloat32) { 2271 idx := e.payload.memoIdx - int32(start) 2272 if idx >= 0 { 2273 data[idx] = utils.ToLEFloat32(e.payload.val) 2274 } 2275 }) 2276 } 2277 2278 func (h *Float32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 2279 2280 func (Float32HashTable) fixHash(v uint64) uint64 { 2281 if v == sentinel { 2282 return 42 2283 } 2284 return v 2285 } 2286 2287 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 2288 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 2289 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 2290 func (h *Float32HashTable) Lookup(v uint64, cmp func(float32) bool) (*entryFloat32, bool) { 2291 idx, ok := h.lookup(v, h.capMask, cmp) 2292 return &h.entries[idx], ok 2293 } 2294 2295 func (h *Float32HashTable) lookup(v uint64, szMask uint64, cmp func(float32) bool) (uint64, bool) { 2296 const perturbShift uint8 = 5 2297 2298 var ( 2299 idx uint64 2300 perturb uint64 2301 e *entryFloat32 2302 ) 2303 2304 v = h.fixHash(v) 2305 idx = v & szMask 2306 perturb = (v >> uint64(perturbShift)) + 1 2307 2308 for { 2309 e = &h.entries[idx] 2310 if e.h == v && cmp(e.payload.val) { 2311 return idx, true 2312 } 2313 2314 if e.h == sentinel { 2315 return idx, false 2316 } 2317 2318 // perturbation logic inspired from CPython's set/dict object 2319 // the goal is that all 64 bits of unmasked hash value eventually 2320 // participate int he probing sequence, to minimize clustering 2321 idx = (idx + perturb) & szMask 2322 perturb = (perturb >> uint64(perturbShift)) + 1 2323 } 2324 } 2325 2326 func (h *Float32HashTable) upsize(newcap uint64) error { 2327 newMask := newcap - 1 2328 2329 oldEntries := h.entries 2330 h.entries = make([]entryFloat32, newcap) 2331 for _, e := range oldEntries { 2332 if e.Valid() { 2333 idx, _ := h.lookup(e.h, newMask, func(float32) bool { return false }) 2334 h.entries[idx] = e 2335 } 2336 } 2337 h.cap = newcap 2338 h.capMask = newMask 2339 return nil 2340 } 2341 2342 // Insert updates the given entry with the provided hash value, payload value and memo index. 2343 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 2344 func (h *Float32HashTable) Insert(e *entryFloat32, v uint64, val float32, memoIdx int32) error { 2345 e.h = h.fixHash(v) 2346 e.payload.val = val 2347 e.payload.memoIdx = memoIdx 2348 h.size++ 2349 2350 if h.needUpsize() { 2351 h.upsize(h.cap * uint64(loadFactor) * 2) 2352 } 2353 return nil 2354 } 2355 2356 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 2357 // a valid entry being one which has had a value inserted into it. 2358 func (h *Float32HashTable) VisitEntries(visit func(*entryFloat32)) { 2359 for _, e := range h.entries { 2360 if e.Valid() { 2361 visit(&e) 2362 } 2363 } 2364 } 2365 2366 // Float32MemoTable is a wrapper over the appropriate hashtable to provide an interface 2367 // conforming to the MemoTable interface defined in the encoding package for general interactions 2368 // regarding dictionaries. 2369 type Float32MemoTable struct { 2370 tbl *Float32HashTable 2371 nullIdx int32 2372 } 2373 2374 // NewFloat32MemoTable returns a new memotable with num entries pre-allocated to reduce further 2375 // allocations when inserting. 2376 func NewFloat32MemoTable(num int64) *Float32MemoTable { 2377 return &Float32MemoTable{tbl: NewFloat32HashTable(uint64(num)), nullIdx: KeyNotFound} 2378 } 2379 2380 func (Float32MemoTable) TypeTraits() TypeTraits { 2381 return arrow.Float32Traits 2382 } 2383 2384 // Reset allows this table to be re-used by dumping all the data currently in the table. 2385 func (s *Float32MemoTable) Reset() { 2386 s.tbl.Reset(32) 2387 s.nullIdx = KeyNotFound 2388 } 2389 2390 // Size returns the current number of inserted elements into the table including if a null 2391 // has been inserted. 2392 func (s *Float32MemoTable) Size() int { 2393 sz := int(s.tbl.size) 2394 if _, ok := s.GetNull(); ok { 2395 sz++ 2396 } 2397 return sz 2398 } 2399 2400 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 2401 // that will be true if found and false if not. 2402 func (s *Float32MemoTable) GetNull() (int, bool) { 2403 return int(s.nullIdx), s.nullIdx != KeyNotFound 2404 } 2405 2406 // GetOrInsertNull will return the index of the null entry or insert a null entry 2407 // if one currently doesn't exist. The found value will be true if there was already 2408 // a null in the table, and false if it inserted one. 2409 func (s *Float32MemoTable) GetOrInsertNull() (idx int, found bool) { 2410 idx, found = s.GetNull() 2411 if !found { 2412 idx = s.Size() 2413 s.nullIdx = int32(idx) 2414 } 2415 return 2416 } 2417 2418 // CopyValues will copy the values from the memo table out into the passed in slice 2419 // which must be of the appropriate type. 2420 func (s *Float32MemoTable) CopyValues(out interface{}) { 2421 s.CopyValuesSubset(0, out) 2422 } 2423 2424 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 2425 // at the provided start index 2426 func (s *Float32MemoTable) CopyValuesSubset(start int, out interface{}) { 2427 s.tbl.CopyValuesSubset(start, out.([]float32)) 2428 } 2429 2430 func (s *Float32MemoTable) WriteOut(out []byte) { 2431 s.tbl.CopyValues(arrow.Float32Traits.CastFromBytes(out)) 2432 } 2433 2434 func (s *Float32MemoTable) WriteOutSubset(start int, out []byte) { 2435 s.tbl.CopyValuesSubset(start, arrow.Float32Traits.CastFromBytes(out)) 2436 } 2437 2438 func (s *Float32MemoTable) WriteOutLE(out []byte) { 2439 s.tbl.WriteOut(out) 2440 } 2441 2442 func (s *Float32MemoTable) WriteOutSubsetLE(start int, out []byte) { 2443 s.tbl.WriteOutSubset(start, out) 2444 } 2445 2446 // Get returns the index of the requested value in the hash table or KeyNotFound 2447 // along with a boolean indicating if it was found or not. 2448 func (s *Float32MemoTable) Get(val interface{}) (int, bool) { 2449 var cmp func(float32) bool 2450 2451 if math.IsNaN(float64(val.(float32))) { 2452 cmp = isNan32Cmp 2453 // use consistent internal bit pattern for NaN regardless of the pattern 2454 // that is passed to us. NaN is NaN is NaN 2455 val = float32(math.NaN()) 2456 } else { 2457 cmp = func(v float32) bool { return val.(float32) == v } 2458 } 2459 2460 h := hashFloat32(val.(float32), 0) 2461 if e, ok := s.tbl.Lookup(h, cmp); ok { 2462 return int(e.payload.memoIdx), ok 2463 } 2464 return KeyNotFound, false 2465 } 2466 2467 // GetOrInsert will return the index of the specified value in the table, or insert the 2468 // value into the table and return the new index. found indicates whether or not it already 2469 // existed in the table (true) or was inserted by this call (false). 2470 func (s *Float32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 2471 2472 var cmp func(float32) bool 2473 2474 if math.IsNaN(float64(val.(float32))) { 2475 cmp = isNan32Cmp 2476 // use consistent internal bit pattern for NaN regardless of the pattern 2477 // that is passed to us. NaN is NaN is NaN 2478 val = float32(math.NaN()) 2479 } else { 2480 cmp = func(v float32) bool { return val.(float32) == v } 2481 } 2482 2483 h := hashFloat32(val.(float32), 0) 2484 e, ok := s.tbl.Lookup(h, cmp) 2485 2486 if ok { 2487 idx = int(e.payload.memoIdx) 2488 found = true 2489 } else { 2490 idx = s.Size() 2491 s.tbl.Insert(e, h, val.(float32), int32(idx)) 2492 } 2493 return 2494 } 2495 2496 type payloadFloat64 struct { 2497 val float64 2498 memoIdx int32 2499 } 2500 2501 type entryFloat64 struct { 2502 h uint64 2503 payload payloadFloat64 2504 } 2505 2506 func (e entryFloat64) Valid() bool { return e.h != sentinel } 2507 2508 // Float64HashTable is a hashtable specifically for float64 that 2509 // is utilized with the MemoTable to generalize interactions for easier 2510 // implementation of dictionaries without losing performance. 2511 type Float64HashTable struct { 2512 cap uint64 2513 capMask uint64 2514 size uint64 2515 2516 entries []entryFloat64 2517 } 2518 2519 // NewFloat64HashTable returns a new hash table for float64 values 2520 // initialized with the passed in capacity or 32 whichever is larger. 2521 func NewFloat64HashTable(cap uint64) *Float64HashTable { 2522 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 2523 ret := &Float64HashTable{cap: initCap, capMask: initCap - 1, size: 0} 2524 ret.entries = make([]entryFloat64, initCap) 2525 return ret 2526 } 2527 2528 // Reset drops all of the values in this hash table and re-initializes it 2529 // with the specified initial capacity as if by calling New, but without having 2530 // to reallocate the object. 2531 func (h *Float64HashTable) Reset(cap uint64) { 2532 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 2533 h.capMask = h.cap - 1 2534 h.size = 0 2535 h.entries = make([]entryFloat64, h.cap) 2536 } 2537 2538 // CopyValues is used for copying the values out of the hash table into the 2539 // passed in slice, in the order that they were first inserted 2540 func (h *Float64HashTable) CopyValues(out []float64) { 2541 h.CopyValuesSubset(0, out) 2542 } 2543 2544 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 2545 // with the value at start, in the order that they were inserted. 2546 func (h *Float64HashTable) CopyValuesSubset(start int, out []float64) { 2547 h.VisitEntries(func(e *entryFloat64) { 2548 idx := e.payload.memoIdx - int32(start) 2549 if idx >= 0 { 2550 out[idx] = e.payload.val 2551 } 2552 }) 2553 } 2554 2555 func (h *Float64HashTable) WriteOut(out []byte) { 2556 h.WriteOutSubset(0, out) 2557 } 2558 2559 func (h *Float64HashTable) WriteOutSubset(start int, out []byte) { 2560 data := arrow.Float64Traits.CastFromBytes(out) 2561 h.VisitEntries(func(e *entryFloat64) { 2562 idx := e.payload.memoIdx - int32(start) 2563 if idx >= 0 { 2564 data[idx] = utils.ToLEFloat64(e.payload.val) 2565 } 2566 }) 2567 } 2568 2569 func (h *Float64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 2570 2571 func (Float64HashTable) fixHash(v uint64) uint64 { 2572 if v == sentinel { 2573 return 42 2574 } 2575 return v 2576 } 2577 2578 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 2579 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 2580 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 2581 func (h *Float64HashTable) Lookup(v uint64, cmp func(float64) bool) (*entryFloat64, bool) { 2582 idx, ok := h.lookup(v, h.capMask, cmp) 2583 return &h.entries[idx], ok 2584 } 2585 2586 func (h *Float64HashTable) lookup(v uint64, szMask uint64, cmp func(float64) bool) (uint64, bool) { 2587 const perturbShift uint8 = 5 2588 2589 var ( 2590 idx uint64 2591 perturb uint64 2592 e *entryFloat64 2593 ) 2594 2595 v = h.fixHash(v) 2596 idx = v & szMask 2597 perturb = (v >> uint64(perturbShift)) + 1 2598 2599 for { 2600 e = &h.entries[idx] 2601 if e.h == v && cmp(e.payload.val) { 2602 return idx, true 2603 } 2604 2605 if e.h == sentinel { 2606 return idx, false 2607 } 2608 2609 // perturbation logic inspired from CPython's set/dict object 2610 // the goal is that all 64 bits of unmasked hash value eventually 2611 // participate int he probing sequence, to minimize clustering 2612 idx = (idx + perturb) & szMask 2613 perturb = (perturb >> uint64(perturbShift)) + 1 2614 } 2615 } 2616 2617 func (h *Float64HashTable) upsize(newcap uint64) error { 2618 newMask := newcap - 1 2619 2620 oldEntries := h.entries 2621 h.entries = make([]entryFloat64, newcap) 2622 for _, e := range oldEntries { 2623 if e.Valid() { 2624 idx, _ := h.lookup(e.h, newMask, func(float64) bool { return false }) 2625 h.entries[idx] = e 2626 } 2627 } 2628 h.cap = newcap 2629 h.capMask = newMask 2630 return nil 2631 } 2632 2633 // Insert updates the given entry with the provided hash value, payload value and memo index. 2634 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 2635 func (h *Float64HashTable) Insert(e *entryFloat64, v uint64, val float64, memoIdx int32) error { 2636 e.h = h.fixHash(v) 2637 e.payload.val = val 2638 e.payload.memoIdx = memoIdx 2639 h.size++ 2640 2641 if h.needUpsize() { 2642 h.upsize(h.cap * uint64(loadFactor) * 2) 2643 } 2644 return nil 2645 } 2646 2647 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 2648 // a valid entry being one which has had a value inserted into it. 2649 func (h *Float64HashTable) VisitEntries(visit func(*entryFloat64)) { 2650 for _, e := range h.entries { 2651 if e.Valid() { 2652 visit(&e) 2653 } 2654 } 2655 } 2656 2657 // Float64MemoTable is a wrapper over the appropriate hashtable to provide an interface 2658 // conforming to the MemoTable interface defined in the encoding package for general interactions 2659 // regarding dictionaries. 2660 type Float64MemoTable struct { 2661 tbl *Float64HashTable 2662 nullIdx int32 2663 } 2664 2665 // NewFloat64MemoTable returns a new memotable with num entries pre-allocated to reduce further 2666 // allocations when inserting. 2667 func NewFloat64MemoTable(num int64) *Float64MemoTable { 2668 return &Float64MemoTable{tbl: NewFloat64HashTable(uint64(num)), nullIdx: KeyNotFound} 2669 } 2670 2671 func (Float64MemoTable) TypeTraits() TypeTraits { 2672 return arrow.Float64Traits 2673 } 2674 2675 // Reset allows this table to be re-used by dumping all the data currently in the table. 2676 func (s *Float64MemoTable) Reset() { 2677 s.tbl.Reset(32) 2678 s.nullIdx = KeyNotFound 2679 } 2680 2681 // Size returns the current number of inserted elements into the table including if a null 2682 // has been inserted. 2683 func (s *Float64MemoTable) Size() int { 2684 sz := int(s.tbl.size) 2685 if _, ok := s.GetNull(); ok { 2686 sz++ 2687 } 2688 return sz 2689 } 2690 2691 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 2692 // that will be true if found and false if not. 2693 func (s *Float64MemoTable) GetNull() (int, bool) { 2694 return int(s.nullIdx), s.nullIdx != KeyNotFound 2695 } 2696 2697 // GetOrInsertNull will return the index of the null entry or insert a null entry 2698 // if one currently doesn't exist. The found value will be true if there was already 2699 // a null in the table, and false if it inserted one. 2700 func (s *Float64MemoTable) GetOrInsertNull() (idx int, found bool) { 2701 idx, found = s.GetNull() 2702 if !found { 2703 idx = s.Size() 2704 s.nullIdx = int32(idx) 2705 } 2706 return 2707 } 2708 2709 // CopyValues will copy the values from the memo table out into the passed in slice 2710 // which must be of the appropriate type. 2711 func (s *Float64MemoTable) CopyValues(out interface{}) { 2712 s.CopyValuesSubset(0, out) 2713 } 2714 2715 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 2716 // at the provided start index 2717 func (s *Float64MemoTable) CopyValuesSubset(start int, out interface{}) { 2718 s.tbl.CopyValuesSubset(start, out.([]float64)) 2719 } 2720 2721 func (s *Float64MemoTable) WriteOut(out []byte) { 2722 s.tbl.CopyValues(arrow.Float64Traits.CastFromBytes(out)) 2723 } 2724 2725 func (s *Float64MemoTable) WriteOutSubset(start int, out []byte) { 2726 s.tbl.CopyValuesSubset(start, arrow.Float64Traits.CastFromBytes(out)) 2727 } 2728 2729 func (s *Float64MemoTable) WriteOutLE(out []byte) { 2730 s.tbl.WriteOut(out) 2731 } 2732 2733 func (s *Float64MemoTable) WriteOutSubsetLE(start int, out []byte) { 2734 s.tbl.WriteOutSubset(start, out) 2735 } 2736 2737 // Get returns the index of the requested value in the hash table or KeyNotFound 2738 // along with a boolean indicating if it was found or not. 2739 func (s *Float64MemoTable) Get(val interface{}) (int, bool) { 2740 var cmp func(float64) bool 2741 if math.IsNaN(val.(float64)) { 2742 cmp = math.IsNaN 2743 // use consistent internal bit pattern for NaN regardless of the pattern 2744 // that is passed to us. NaN is NaN is NaN 2745 val = math.NaN() 2746 } else { 2747 cmp = func(v float64) bool { return val.(float64) == v } 2748 } 2749 2750 h := hashFloat64(val.(float64), 0) 2751 if e, ok := s.tbl.Lookup(h, cmp); ok { 2752 return int(e.payload.memoIdx), ok 2753 } 2754 return KeyNotFound, false 2755 } 2756 2757 // GetOrInsert will return the index of the specified value in the table, or insert the 2758 // value into the table and return the new index. found indicates whether or not it already 2759 // existed in the table (true) or was inserted by this call (false). 2760 func (s *Float64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 2761 2762 var cmp func(float64) bool 2763 if math.IsNaN(val.(float64)) { 2764 cmp = math.IsNaN 2765 // use consistent internal bit pattern for NaN regardless of the pattern 2766 // that is passed to us. NaN is NaN is NaN 2767 val = math.NaN() 2768 } else { 2769 cmp = func(v float64) bool { return val.(float64) == v } 2770 } 2771 2772 h := hashFloat64(val.(float64), 0) 2773 e, ok := s.tbl.Lookup(h, cmp) 2774 2775 if ok { 2776 idx = int(e.payload.memoIdx) 2777 found = true 2778 } else { 2779 idx = s.Size() 2780 s.tbl.Insert(e, h, val.(float64), int32(idx)) 2781 } 2782 return 2783 }