github.com/apache/arrow/go/v14@v14.0.1/internal/hashing/xxh3_memo_table.gen.go (about) 1 // Code generated by xxh3_memo_table.gen.go.tmpl. DO NOT EDIT. 2 3 // Licensed to the Apache Software Foundation (ASF) under one 4 // or more contributor license agreements. See the NOTICE file 5 // distributed with this work for additional information 6 // regarding copyright ownership. The ASF licenses this file 7 // to you under the Apache License, Version 2.0 (the 8 // "License"); you may not use this file except in compliance 9 // with the License. You may obtain a copy of the License at 10 // 11 // http://www.apache.org/licenses/LICENSE-2.0 12 // 13 // Unless required by applicable law or agreed to in writing, software 14 // distributed under the License is distributed on an "AS IS" BASIS, 15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 // See the License for the specific language governing permissions and 17 // limitations under the License. 18 19 package hashing 20 21 import ( 22 "math" 23 24 "github.com/apache/arrow/go/v14/arrow" 25 "github.com/apache/arrow/go/v14/arrow/bitutil" 26 "github.com/apache/arrow/go/v14/internal/utils" 27 ) 28 29 type payloadInt8 struct { 30 val int8 31 memoIdx int32 32 } 33 34 type entryInt8 struct { 35 h uint64 36 payload payloadInt8 37 } 38 39 func (e entryInt8) Valid() bool { return e.h != sentinel } 40 41 // Int8HashTable is a hashtable specifically for int8 that 42 // is utilized with the MemoTable to generalize interactions for easier 43 // implementation of dictionaries without losing performance. 44 type Int8HashTable struct { 45 cap uint64 46 capMask uint64 47 size uint64 48 49 entries []entryInt8 50 } 51 52 // NewInt8HashTable returns a new hash table for int8 values 53 // initialized with the passed in capacity or 32 whichever is larger. 54 func NewInt8HashTable(cap uint64) *Int8HashTable { 55 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 56 ret := &Int8HashTable{cap: initCap, capMask: initCap - 1, size: 0} 57 ret.entries = make([]entryInt8, initCap) 58 return ret 59 } 60 61 // Reset drops all of the values in this hash table and re-initializes it 62 // with the specified initial capacity as if by calling New, but without having 63 // to reallocate the object. 64 func (h *Int8HashTable) Reset(cap uint64) { 65 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 66 h.capMask = h.cap - 1 67 h.size = 0 68 h.entries = make([]entryInt8, h.cap) 69 } 70 71 // CopyValues is used for copying the values out of the hash table into the 72 // passed in slice, in the order that they were first inserted 73 func (h *Int8HashTable) CopyValues(out []int8) { 74 h.CopyValuesSubset(0, out) 75 } 76 77 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 78 // with the value at start, in the order that they were inserted. 79 func (h *Int8HashTable) CopyValuesSubset(start int, out []int8) { 80 h.VisitEntries(func(e *entryInt8) { 81 idx := e.payload.memoIdx - int32(start) 82 if idx >= 0 { 83 out[idx] = e.payload.val 84 } 85 }) 86 } 87 88 func (h *Int8HashTable) WriteOut(out []byte) { 89 h.WriteOutSubset(0, out) 90 } 91 92 func (h *Int8HashTable) WriteOutSubset(start int, out []byte) { 93 data := arrow.Int8Traits.CastFromBytes(out) 94 h.VisitEntries(func(e *entryInt8) { 95 idx := e.payload.memoIdx - int32(start) 96 if idx >= 0 { 97 data[idx] = e.payload.val 98 } 99 }) 100 } 101 102 func (h *Int8HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 103 104 func (Int8HashTable) fixHash(v uint64) uint64 { 105 if v == sentinel { 106 return 42 107 } 108 return v 109 } 110 111 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 112 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 113 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 114 func (h *Int8HashTable) Lookup(v uint64, cmp func(int8) bool) (*entryInt8, bool) { 115 idx, ok := h.lookup(v, h.capMask, cmp) 116 return &h.entries[idx], ok 117 } 118 119 func (h *Int8HashTable) lookup(v uint64, szMask uint64, cmp func(int8) bool) (uint64, bool) { 120 const perturbShift uint8 = 5 121 122 var ( 123 idx uint64 124 perturb uint64 125 e *entryInt8 126 ) 127 128 v = h.fixHash(v) 129 idx = v & szMask 130 perturb = (v >> uint64(perturbShift)) + 1 131 132 for { 133 e = &h.entries[idx] 134 if e.h == v && cmp(e.payload.val) { 135 return idx, true 136 } 137 138 if e.h == sentinel { 139 return idx, false 140 } 141 142 // perturbation logic inspired from CPython's set/dict object 143 // the goal is that all 64 bits of unmasked hash value eventually 144 // participate int he probing sequence, to minimize clustering 145 idx = (idx + perturb) & szMask 146 perturb = (perturb >> uint64(perturbShift)) + 1 147 } 148 } 149 150 func (h *Int8HashTable) upsize(newcap uint64) error { 151 newMask := newcap - 1 152 153 oldEntries := h.entries 154 h.entries = make([]entryInt8, newcap) 155 for _, e := range oldEntries { 156 if e.Valid() { 157 idx, _ := h.lookup(e.h, newMask, func(int8) bool { return false }) 158 h.entries[idx] = e 159 } 160 } 161 h.cap = newcap 162 h.capMask = newMask 163 return nil 164 } 165 166 // Insert updates the given entry with the provided hash value, payload value and memo index. 167 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 168 func (h *Int8HashTable) Insert(e *entryInt8, v uint64, val int8, memoIdx int32) error { 169 e.h = h.fixHash(v) 170 e.payload.val = val 171 e.payload.memoIdx = memoIdx 172 h.size++ 173 174 if h.needUpsize() { 175 h.upsize(h.cap * uint64(loadFactor) * 2) 176 } 177 return nil 178 } 179 180 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 181 // a valid entry being one which has had a value inserted into it. 182 func (h *Int8HashTable) VisitEntries(visit func(*entryInt8)) { 183 for _, e := range h.entries { 184 if e.Valid() { 185 visit(&e) 186 } 187 } 188 } 189 190 // Int8MemoTable is a wrapper over the appropriate hashtable to provide an interface 191 // conforming to the MemoTable interface defined in the encoding package for general interactions 192 // regarding dictionaries. 193 type Int8MemoTable struct { 194 tbl *Int8HashTable 195 nullIdx int32 196 } 197 198 // NewInt8MemoTable returns a new memotable with num entries pre-allocated to reduce further 199 // allocations when inserting. 200 func NewInt8MemoTable(num int64) *Int8MemoTable { 201 return &Int8MemoTable{tbl: NewInt8HashTable(uint64(num)), nullIdx: KeyNotFound} 202 } 203 204 func (Int8MemoTable) TypeTraits() TypeTraits { 205 return arrow.Int8Traits 206 } 207 208 // Reset allows this table to be re-used by dumping all the data currently in the table. 209 func (s *Int8MemoTable) Reset() { 210 s.tbl.Reset(32) 211 s.nullIdx = KeyNotFound 212 } 213 214 // Size returns the current number of inserted elements into the table including if a null 215 // has been inserted. 216 func (s *Int8MemoTable) Size() int { 217 sz := int(s.tbl.size) 218 if _, ok := s.GetNull(); ok { 219 sz++ 220 } 221 return sz 222 } 223 224 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 225 // that will be true if found and false if not. 226 func (s *Int8MemoTable) GetNull() (int, bool) { 227 return int(s.nullIdx), s.nullIdx != KeyNotFound 228 } 229 230 // GetOrInsertNull will return the index of the null entry or insert a null entry 231 // if one currently doesn't exist. The found value will be true if there was already 232 // a null in the table, and false if it inserted one. 233 func (s *Int8MemoTable) GetOrInsertNull() (idx int, found bool) { 234 idx, found = s.GetNull() 235 if !found { 236 idx = s.Size() 237 s.nullIdx = int32(idx) 238 } 239 return 240 } 241 242 // CopyValues will copy the values from the memo table out into the passed in slice 243 // which must be of the appropriate type. 244 func (s *Int8MemoTable) CopyValues(out interface{}) { 245 s.CopyValuesSubset(0, out) 246 } 247 248 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 249 // at the provided start index 250 func (s *Int8MemoTable) CopyValuesSubset(start int, out interface{}) { 251 s.tbl.CopyValuesSubset(start, out.([]int8)) 252 } 253 254 func (s *Int8MemoTable) WriteOut(out []byte) { 255 s.tbl.CopyValues(arrow.Int8Traits.CastFromBytes(out)) 256 } 257 258 func (s *Int8MemoTable) WriteOutSubset(start int, out []byte) { 259 s.tbl.CopyValuesSubset(start, arrow.Int8Traits.CastFromBytes(out)) 260 } 261 262 func (s *Int8MemoTable) WriteOutLE(out []byte) { 263 s.tbl.WriteOut(out) 264 } 265 266 func (s *Int8MemoTable) WriteOutSubsetLE(start int, out []byte) { 267 s.tbl.WriteOutSubset(start, out) 268 } 269 270 // Get returns the index of the requested value in the hash table or KeyNotFound 271 // along with a boolean indicating if it was found or not. 272 func (s *Int8MemoTable) Get(val interface{}) (int, bool) { 273 274 h := hashInt(uint64(val.(int8)), 0) 275 if e, ok := s.tbl.Lookup(h, func(v int8) bool { return val.(int8) == v }); ok { 276 return int(e.payload.memoIdx), ok 277 } 278 return KeyNotFound, false 279 } 280 281 // GetOrInsert will return the index of the specified value in the table, or insert the 282 // value into the table and return the new index. found indicates whether or not it already 283 // existed in the table (true) or was inserted by this call (false). 284 func (s *Int8MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 285 286 h := hashInt(uint64(val.(int8)), 0) 287 e, ok := s.tbl.Lookup(h, func(v int8) bool { 288 return val.(int8) == v 289 }) 290 291 if ok { 292 idx = int(e.payload.memoIdx) 293 found = true 294 } else { 295 idx = s.Size() 296 s.tbl.Insert(e, h, val.(int8), int32(idx)) 297 } 298 return 299 } 300 301 // GetOrInsertBytes is unimplemented 302 func (s *Int8MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { 303 panic("unimplemented") 304 } 305 306 type payloadUint8 struct { 307 val uint8 308 memoIdx int32 309 } 310 311 type entryUint8 struct { 312 h uint64 313 payload payloadUint8 314 } 315 316 func (e entryUint8) Valid() bool { return e.h != sentinel } 317 318 // Uint8HashTable is a hashtable specifically for uint8 that 319 // is utilized with the MemoTable to generalize interactions for easier 320 // implementation of dictionaries without losing performance. 321 type Uint8HashTable struct { 322 cap uint64 323 capMask uint64 324 size uint64 325 326 entries []entryUint8 327 } 328 329 // NewUint8HashTable returns a new hash table for uint8 values 330 // initialized with the passed in capacity or 32 whichever is larger. 331 func NewUint8HashTable(cap uint64) *Uint8HashTable { 332 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 333 ret := &Uint8HashTable{cap: initCap, capMask: initCap - 1, size: 0} 334 ret.entries = make([]entryUint8, initCap) 335 return ret 336 } 337 338 // Reset drops all of the values in this hash table and re-initializes it 339 // with the specified initial capacity as if by calling New, but without having 340 // to reallocate the object. 341 func (h *Uint8HashTable) Reset(cap uint64) { 342 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 343 h.capMask = h.cap - 1 344 h.size = 0 345 h.entries = make([]entryUint8, h.cap) 346 } 347 348 // CopyValues is used for copying the values out of the hash table into the 349 // passed in slice, in the order that they were first inserted 350 func (h *Uint8HashTable) CopyValues(out []uint8) { 351 h.CopyValuesSubset(0, out) 352 } 353 354 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 355 // with the value at start, in the order that they were inserted. 356 func (h *Uint8HashTable) CopyValuesSubset(start int, out []uint8) { 357 h.VisitEntries(func(e *entryUint8) { 358 idx := e.payload.memoIdx - int32(start) 359 if idx >= 0 { 360 out[idx] = e.payload.val 361 } 362 }) 363 } 364 365 func (h *Uint8HashTable) WriteOut(out []byte) { 366 h.WriteOutSubset(0, out) 367 } 368 369 func (h *Uint8HashTable) WriteOutSubset(start int, out []byte) { 370 data := arrow.Uint8Traits.CastFromBytes(out) 371 h.VisitEntries(func(e *entryUint8) { 372 idx := e.payload.memoIdx - int32(start) 373 if idx >= 0 { 374 data[idx] = e.payload.val 375 } 376 }) 377 } 378 379 func (h *Uint8HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 380 381 func (Uint8HashTable) fixHash(v uint64) uint64 { 382 if v == sentinel { 383 return 42 384 } 385 return v 386 } 387 388 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 389 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 390 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 391 func (h *Uint8HashTable) Lookup(v uint64, cmp func(uint8) bool) (*entryUint8, bool) { 392 idx, ok := h.lookup(v, h.capMask, cmp) 393 return &h.entries[idx], ok 394 } 395 396 func (h *Uint8HashTable) lookup(v uint64, szMask uint64, cmp func(uint8) bool) (uint64, bool) { 397 const perturbShift uint8 = 5 398 399 var ( 400 idx uint64 401 perturb uint64 402 e *entryUint8 403 ) 404 405 v = h.fixHash(v) 406 idx = v & szMask 407 perturb = (v >> uint64(perturbShift)) + 1 408 409 for { 410 e = &h.entries[idx] 411 if e.h == v && cmp(e.payload.val) { 412 return idx, true 413 } 414 415 if e.h == sentinel { 416 return idx, false 417 } 418 419 // perturbation logic inspired from CPython's set/dict object 420 // the goal is that all 64 bits of unmasked hash value eventually 421 // participate int he probing sequence, to minimize clustering 422 idx = (idx + perturb) & szMask 423 perturb = (perturb >> uint64(perturbShift)) + 1 424 } 425 } 426 427 func (h *Uint8HashTable) upsize(newcap uint64) error { 428 newMask := newcap - 1 429 430 oldEntries := h.entries 431 h.entries = make([]entryUint8, newcap) 432 for _, e := range oldEntries { 433 if e.Valid() { 434 idx, _ := h.lookup(e.h, newMask, func(uint8) bool { return false }) 435 h.entries[idx] = e 436 } 437 } 438 h.cap = newcap 439 h.capMask = newMask 440 return nil 441 } 442 443 // Insert updates the given entry with the provided hash value, payload value and memo index. 444 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 445 func (h *Uint8HashTable) Insert(e *entryUint8, v uint64, val uint8, memoIdx int32) error { 446 e.h = h.fixHash(v) 447 e.payload.val = val 448 e.payload.memoIdx = memoIdx 449 h.size++ 450 451 if h.needUpsize() { 452 h.upsize(h.cap * uint64(loadFactor) * 2) 453 } 454 return nil 455 } 456 457 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 458 // a valid entry being one which has had a value inserted into it. 459 func (h *Uint8HashTable) VisitEntries(visit func(*entryUint8)) { 460 for _, e := range h.entries { 461 if e.Valid() { 462 visit(&e) 463 } 464 } 465 } 466 467 // Uint8MemoTable is a wrapper over the appropriate hashtable to provide an interface 468 // conforming to the MemoTable interface defined in the encoding package for general interactions 469 // regarding dictionaries. 470 type Uint8MemoTable struct { 471 tbl *Uint8HashTable 472 nullIdx int32 473 } 474 475 // NewUint8MemoTable returns a new memotable with num entries pre-allocated to reduce further 476 // allocations when inserting. 477 func NewUint8MemoTable(num int64) *Uint8MemoTable { 478 return &Uint8MemoTable{tbl: NewUint8HashTable(uint64(num)), nullIdx: KeyNotFound} 479 } 480 481 func (Uint8MemoTable) TypeTraits() TypeTraits { 482 return arrow.Uint8Traits 483 } 484 485 // Reset allows this table to be re-used by dumping all the data currently in the table. 486 func (s *Uint8MemoTable) Reset() { 487 s.tbl.Reset(32) 488 s.nullIdx = KeyNotFound 489 } 490 491 // Size returns the current number of inserted elements into the table including if a null 492 // has been inserted. 493 func (s *Uint8MemoTable) Size() int { 494 sz := int(s.tbl.size) 495 if _, ok := s.GetNull(); ok { 496 sz++ 497 } 498 return sz 499 } 500 501 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 502 // that will be true if found and false if not. 503 func (s *Uint8MemoTable) GetNull() (int, bool) { 504 return int(s.nullIdx), s.nullIdx != KeyNotFound 505 } 506 507 // GetOrInsertNull will return the index of the null entry or insert a null entry 508 // if one currently doesn't exist. The found value will be true if there was already 509 // a null in the table, and false if it inserted one. 510 func (s *Uint8MemoTable) GetOrInsertNull() (idx int, found bool) { 511 idx, found = s.GetNull() 512 if !found { 513 idx = s.Size() 514 s.nullIdx = int32(idx) 515 } 516 return 517 } 518 519 // CopyValues will copy the values from the memo table out into the passed in slice 520 // which must be of the appropriate type. 521 func (s *Uint8MemoTable) CopyValues(out interface{}) { 522 s.CopyValuesSubset(0, out) 523 } 524 525 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 526 // at the provided start index 527 func (s *Uint8MemoTable) CopyValuesSubset(start int, out interface{}) { 528 s.tbl.CopyValuesSubset(start, out.([]uint8)) 529 } 530 531 func (s *Uint8MemoTable) WriteOut(out []byte) { 532 s.tbl.CopyValues(arrow.Uint8Traits.CastFromBytes(out)) 533 } 534 535 func (s *Uint8MemoTable) WriteOutSubset(start int, out []byte) { 536 s.tbl.CopyValuesSubset(start, arrow.Uint8Traits.CastFromBytes(out)) 537 } 538 539 func (s *Uint8MemoTable) WriteOutLE(out []byte) { 540 s.tbl.WriteOut(out) 541 } 542 543 func (s *Uint8MemoTable) WriteOutSubsetLE(start int, out []byte) { 544 s.tbl.WriteOutSubset(start, out) 545 } 546 547 // Get returns the index of the requested value in the hash table or KeyNotFound 548 // along with a boolean indicating if it was found or not. 549 func (s *Uint8MemoTable) Get(val interface{}) (int, bool) { 550 551 h := hashInt(uint64(val.(uint8)), 0) 552 if e, ok := s.tbl.Lookup(h, func(v uint8) bool { return val.(uint8) == v }); ok { 553 return int(e.payload.memoIdx), ok 554 } 555 return KeyNotFound, false 556 } 557 558 // GetOrInsert will return the index of the specified value in the table, or insert the 559 // value into the table and return the new index. found indicates whether or not it already 560 // existed in the table (true) or was inserted by this call (false). 561 func (s *Uint8MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 562 563 h := hashInt(uint64(val.(uint8)), 0) 564 e, ok := s.tbl.Lookup(h, func(v uint8) bool { 565 return val.(uint8) == v 566 }) 567 568 if ok { 569 idx = int(e.payload.memoIdx) 570 found = true 571 } else { 572 idx = s.Size() 573 s.tbl.Insert(e, h, val.(uint8), int32(idx)) 574 } 575 return 576 } 577 578 // GetOrInsertBytes is unimplemented 579 func (s *Uint8MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { 580 panic("unimplemented") 581 } 582 583 type payloadInt16 struct { 584 val int16 585 memoIdx int32 586 } 587 588 type entryInt16 struct { 589 h uint64 590 payload payloadInt16 591 } 592 593 func (e entryInt16) Valid() bool { return e.h != sentinel } 594 595 // Int16HashTable is a hashtable specifically for int16 that 596 // is utilized with the MemoTable to generalize interactions for easier 597 // implementation of dictionaries without losing performance. 598 type Int16HashTable struct { 599 cap uint64 600 capMask uint64 601 size uint64 602 603 entries []entryInt16 604 } 605 606 // NewInt16HashTable returns a new hash table for int16 values 607 // initialized with the passed in capacity or 32 whichever is larger. 608 func NewInt16HashTable(cap uint64) *Int16HashTable { 609 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 610 ret := &Int16HashTable{cap: initCap, capMask: initCap - 1, size: 0} 611 ret.entries = make([]entryInt16, initCap) 612 return ret 613 } 614 615 // Reset drops all of the values in this hash table and re-initializes it 616 // with the specified initial capacity as if by calling New, but without having 617 // to reallocate the object. 618 func (h *Int16HashTable) Reset(cap uint64) { 619 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 620 h.capMask = h.cap - 1 621 h.size = 0 622 h.entries = make([]entryInt16, h.cap) 623 } 624 625 // CopyValues is used for copying the values out of the hash table into the 626 // passed in slice, in the order that they were first inserted 627 func (h *Int16HashTable) CopyValues(out []int16) { 628 h.CopyValuesSubset(0, out) 629 } 630 631 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 632 // with the value at start, in the order that they were inserted. 633 func (h *Int16HashTable) CopyValuesSubset(start int, out []int16) { 634 h.VisitEntries(func(e *entryInt16) { 635 idx := e.payload.memoIdx - int32(start) 636 if idx >= 0 { 637 out[idx] = e.payload.val 638 } 639 }) 640 } 641 642 func (h *Int16HashTable) WriteOut(out []byte) { 643 h.WriteOutSubset(0, out) 644 } 645 646 func (h *Int16HashTable) WriteOutSubset(start int, out []byte) { 647 data := arrow.Int16Traits.CastFromBytes(out) 648 h.VisitEntries(func(e *entryInt16) { 649 idx := e.payload.memoIdx - int32(start) 650 if idx >= 0 { 651 data[idx] = utils.ToLEInt16(e.payload.val) 652 } 653 }) 654 } 655 656 func (h *Int16HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 657 658 func (Int16HashTable) fixHash(v uint64) uint64 { 659 if v == sentinel { 660 return 42 661 } 662 return v 663 } 664 665 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 666 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 667 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 668 func (h *Int16HashTable) Lookup(v uint64, cmp func(int16) bool) (*entryInt16, bool) { 669 idx, ok := h.lookup(v, h.capMask, cmp) 670 return &h.entries[idx], ok 671 } 672 673 func (h *Int16HashTable) lookup(v uint64, szMask uint64, cmp func(int16) bool) (uint64, bool) { 674 const perturbShift uint8 = 5 675 676 var ( 677 idx uint64 678 perturb uint64 679 e *entryInt16 680 ) 681 682 v = h.fixHash(v) 683 idx = v & szMask 684 perturb = (v >> uint64(perturbShift)) + 1 685 686 for { 687 e = &h.entries[idx] 688 if e.h == v && cmp(e.payload.val) { 689 return idx, true 690 } 691 692 if e.h == sentinel { 693 return idx, false 694 } 695 696 // perturbation logic inspired from CPython's set/dict object 697 // the goal is that all 64 bits of unmasked hash value eventually 698 // participate int he probing sequence, to minimize clustering 699 idx = (idx + perturb) & szMask 700 perturb = (perturb >> uint64(perturbShift)) + 1 701 } 702 } 703 704 func (h *Int16HashTable) upsize(newcap uint64) error { 705 newMask := newcap - 1 706 707 oldEntries := h.entries 708 h.entries = make([]entryInt16, newcap) 709 for _, e := range oldEntries { 710 if e.Valid() { 711 idx, _ := h.lookup(e.h, newMask, func(int16) bool { return false }) 712 h.entries[idx] = e 713 } 714 } 715 h.cap = newcap 716 h.capMask = newMask 717 return nil 718 } 719 720 // Insert updates the given entry with the provided hash value, payload value and memo index. 721 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 722 func (h *Int16HashTable) Insert(e *entryInt16, v uint64, val int16, memoIdx int32) error { 723 e.h = h.fixHash(v) 724 e.payload.val = val 725 e.payload.memoIdx = memoIdx 726 h.size++ 727 728 if h.needUpsize() { 729 h.upsize(h.cap * uint64(loadFactor) * 2) 730 } 731 return nil 732 } 733 734 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 735 // a valid entry being one which has had a value inserted into it. 736 func (h *Int16HashTable) VisitEntries(visit func(*entryInt16)) { 737 for _, e := range h.entries { 738 if e.Valid() { 739 visit(&e) 740 } 741 } 742 } 743 744 // Int16MemoTable is a wrapper over the appropriate hashtable to provide an interface 745 // conforming to the MemoTable interface defined in the encoding package for general interactions 746 // regarding dictionaries. 747 type Int16MemoTable struct { 748 tbl *Int16HashTable 749 nullIdx int32 750 } 751 752 // NewInt16MemoTable returns a new memotable with num entries pre-allocated to reduce further 753 // allocations when inserting. 754 func NewInt16MemoTable(num int64) *Int16MemoTable { 755 return &Int16MemoTable{tbl: NewInt16HashTable(uint64(num)), nullIdx: KeyNotFound} 756 } 757 758 func (Int16MemoTable) TypeTraits() TypeTraits { 759 return arrow.Int16Traits 760 } 761 762 // Reset allows this table to be re-used by dumping all the data currently in the table. 763 func (s *Int16MemoTable) Reset() { 764 s.tbl.Reset(32) 765 s.nullIdx = KeyNotFound 766 } 767 768 // Size returns the current number of inserted elements into the table including if a null 769 // has been inserted. 770 func (s *Int16MemoTable) Size() int { 771 sz := int(s.tbl.size) 772 if _, ok := s.GetNull(); ok { 773 sz++ 774 } 775 return sz 776 } 777 778 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 779 // that will be true if found and false if not. 780 func (s *Int16MemoTable) GetNull() (int, bool) { 781 return int(s.nullIdx), s.nullIdx != KeyNotFound 782 } 783 784 // GetOrInsertNull will return the index of the null entry or insert a null entry 785 // if one currently doesn't exist. The found value will be true if there was already 786 // a null in the table, and false if it inserted one. 787 func (s *Int16MemoTable) GetOrInsertNull() (idx int, found bool) { 788 idx, found = s.GetNull() 789 if !found { 790 idx = s.Size() 791 s.nullIdx = int32(idx) 792 } 793 return 794 } 795 796 // CopyValues will copy the values from the memo table out into the passed in slice 797 // which must be of the appropriate type. 798 func (s *Int16MemoTable) CopyValues(out interface{}) { 799 s.CopyValuesSubset(0, out) 800 } 801 802 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 803 // at the provided start index 804 func (s *Int16MemoTable) CopyValuesSubset(start int, out interface{}) { 805 s.tbl.CopyValuesSubset(start, out.([]int16)) 806 } 807 808 func (s *Int16MemoTable) WriteOut(out []byte) { 809 s.tbl.CopyValues(arrow.Int16Traits.CastFromBytes(out)) 810 } 811 812 func (s *Int16MemoTable) WriteOutSubset(start int, out []byte) { 813 s.tbl.CopyValuesSubset(start, arrow.Int16Traits.CastFromBytes(out)) 814 } 815 816 func (s *Int16MemoTable) WriteOutLE(out []byte) { 817 s.tbl.WriteOut(out) 818 } 819 820 func (s *Int16MemoTable) WriteOutSubsetLE(start int, out []byte) { 821 s.tbl.WriteOutSubset(start, out) 822 } 823 824 // Get returns the index of the requested value in the hash table or KeyNotFound 825 // along with a boolean indicating if it was found or not. 826 func (s *Int16MemoTable) Get(val interface{}) (int, bool) { 827 828 h := hashInt(uint64(val.(int16)), 0) 829 if e, ok := s.tbl.Lookup(h, func(v int16) bool { return val.(int16) == v }); ok { 830 return int(e.payload.memoIdx), ok 831 } 832 return KeyNotFound, false 833 } 834 835 // GetOrInsert will return the index of the specified value in the table, or insert the 836 // value into the table and return the new index. found indicates whether or not it already 837 // existed in the table (true) or was inserted by this call (false). 838 func (s *Int16MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 839 840 h := hashInt(uint64(val.(int16)), 0) 841 e, ok := s.tbl.Lookup(h, func(v int16) bool { 842 return val.(int16) == v 843 }) 844 845 if ok { 846 idx = int(e.payload.memoIdx) 847 found = true 848 } else { 849 idx = s.Size() 850 s.tbl.Insert(e, h, val.(int16), int32(idx)) 851 } 852 return 853 } 854 855 // GetOrInsertBytes is unimplemented 856 func (s *Int16MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { 857 panic("unimplemented") 858 } 859 860 type payloadUint16 struct { 861 val uint16 862 memoIdx int32 863 } 864 865 type entryUint16 struct { 866 h uint64 867 payload payloadUint16 868 } 869 870 func (e entryUint16) Valid() bool { return e.h != sentinel } 871 872 // Uint16HashTable is a hashtable specifically for uint16 that 873 // is utilized with the MemoTable to generalize interactions for easier 874 // implementation of dictionaries without losing performance. 875 type Uint16HashTable struct { 876 cap uint64 877 capMask uint64 878 size uint64 879 880 entries []entryUint16 881 } 882 883 // NewUint16HashTable returns a new hash table for uint16 values 884 // initialized with the passed in capacity or 32 whichever is larger. 885 func NewUint16HashTable(cap uint64) *Uint16HashTable { 886 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 887 ret := &Uint16HashTable{cap: initCap, capMask: initCap - 1, size: 0} 888 ret.entries = make([]entryUint16, initCap) 889 return ret 890 } 891 892 // Reset drops all of the values in this hash table and re-initializes it 893 // with the specified initial capacity as if by calling New, but without having 894 // to reallocate the object. 895 func (h *Uint16HashTable) Reset(cap uint64) { 896 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 897 h.capMask = h.cap - 1 898 h.size = 0 899 h.entries = make([]entryUint16, h.cap) 900 } 901 902 // CopyValues is used for copying the values out of the hash table into the 903 // passed in slice, in the order that they were first inserted 904 func (h *Uint16HashTable) CopyValues(out []uint16) { 905 h.CopyValuesSubset(0, out) 906 } 907 908 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 909 // with the value at start, in the order that they were inserted. 910 func (h *Uint16HashTable) CopyValuesSubset(start int, out []uint16) { 911 h.VisitEntries(func(e *entryUint16) { 912 idx := e.payload.memoIdx - int32(start) 913 if idx >= 0 { 914 out[idx] = e.payload.val 915 } 916 }) 917 } 918 919 func (h *Uint16HashTable) WriteOut(out []byte) { 920 h.WriteOutSubset(0, out) 921 } 922 923 func (h *Uint16HashTable) WriteOutSubset(start int, out []byte) { 924 data := arrow.Uint16Traits.CastFromBytes(out) 925 h.VisitEntries(func(e *entryUint16) { 926 idx := e.payload.memoIdx - int32(start) 927 if idx >= 0 { 928 data[idx] = utils.ToLEUint16(e.payload.val) 929 } 930 }) 931 } 932 933 func (h *Uint16HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 934 935 func (Uint16HashTable) fixHash(v uint64) uint64 { 936 if v == sentinel { 937 return 42 938 } 939 return v 940 } 941 942 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 943 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 944 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 945 func (h *Uint16HashTable) Lookup(v uint64, cmp func(uint16) bool) (*entryUint16, bool) { 946 idx, ok := h.lookup(v, h.capMask, cmp) 947 return &h.entries[idx], ok 948 } 949 950 func (h *Uint16HashTable) lookup(v uint64, szMask uint64, cmp func(uint16) bool) (uint64, bool) { 951 const perturbShift uint8 = 5 952 953 var ( 954 idx uint64 955 perturb uint64 956 e *entryUint16 957 ) 958 959 v = h.fixHash(v) 960 idx = v & szMask 961 perturb = (v >> uint64(perturbShift)) + 1 962 963 for { 964 e = &h.entries[idx] 965 if e.h == v && cmp(e.payload.val) { 966 return idx, true 967 } 968 969 if e.h == sentinel { 970 return idx, false 971 } 972 973 // perturbation logic inspired from CPython's set/dict object 974 // the goal is that all 64 bits of unmasked hash value eventually 975 // participate int he probing sequence, to minimize clustering 976 idx = (idx + perturb) & szMask 977 perturb = (perturb >> uint64(perturbShift)) + 1 978 } 979 } 980 981 func (h *Uint16HashTable) upsize(newcap uint64) error { 982 newMask := newcap - 1 983 984 oldEntries := h.entries 985 h.entries = make([]entryUint16, newcap) 986 for _, e := range oldEntries { 987 if e.Valid() { 988 idx, _ := h.lookup(e.h, newMask, func(uint16) bool { return false }) 989 h.entries[idx] = e 990 } 991 } 992 h.cap = newcap 993 h.capMask = newMask 994 return nil 995 } 996 997 // Insert updates the given entry with the provided hash value, payload value and memo index. 998 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 999 func (h *Uint16HashTable) Insert(e *entryUint16, v uint64, val uint16, memoIdx int32) error { 1000 e.h = h.fixHash(v) 1001 e.payload.val = val 1002 e.payload.memoIdx = memoIdx 1003 h.size++ 1004 1005 if h.needUpsize() { 1006 h.upsize(h.cap * uint64(loadFactor) * 2) 1007 } 1008 return nil 1009 } 1010 1011 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 1012 // a valid entry being one which has had a value inserted into it. 1013 func (h *Uint16HashTable) VisitEntries(visit func(*entryUint16)) { 1014 for _, e := range h.entries { 1015 if e.Valid() { 1016 visit(&e) 1017 } 1018 } 1019 } 1020 1021 // Uint16MemoTable is a wrapper over the appropriate hashtable to provide an interface 1022 // conforming to the MemoTable interface defined in the encoding package for general interactions 1023 // regarding dictionaries. 1024 type Uint16MemoTable struct { 1025 tbl *Uint16HashTable 1026 nullIdx int32 1027 } 1028 1029 // NewUint16MemoTable returns a new memotable with num entries pre-allocated to reduce further 1030 // allocations when inserting. 1031 func NewUint16MemoTable(num int64) *Uint16MemoTable { 1032 return &Uint16MemoTable{tbl: NewUint16HashTable(uint64(num)), nullIdx: KeyNotFound} 1033 } 1034 1035 func (Uint16MemoTable) TypeTraits() TypeTraits { 1036 return arrow.Uint16Traits 1037 } 1038 1039 // Reset allows this table to be re-used by dumping all the data currently in the table. 1040 func (s *Uint16MemoTable) Reset() { 1041 s.tbl.Reset(32) 1042 s.nullIdx = KeyNotFound 1043 } 1044 1045 // Size returns the current number of inserted elements into the table including if a null 1046 // has been inserted. 1047 func (s *Uint16MemoTable) Size() int { 1048 sz := int(s.tbl.size) 1049 if _, ok := s.GetNull(); ok { 1050 sz++ 1051 } 1052 return sz 1053 } 1054 1055 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 1056 // that will be true if found and false if not. 1057 func (s *Uint16MemoTable) GetNull() (int, bool) { 1058 return int(s.nullIdx), s.nullIdx != KeyNotFound 1059 } 1060 1061 // GetOrInsertNull will return the index of the null entry or insert a null entry 1062 // if one currently doesn't exist. The found value will be true if there was already 1063 // a null in the table, and false if it inserted one. 1064 func (s *Uint16MemoTable) GetOrInsertNull() (idx int, found bool) { 1065 idx, found = s.GetNull() 1066 if !found { 1067 idx = s.Size() 1068 s.nullIdx = int32(idx) 1069 } 1070 return 1071 } 1072 1073 // CopyValues will copy the values from the memo table out into the passed in slice 1074 // which must be of the appropriate type. 1075 func (s *Uint16MemoTable) CopyValues(out interface{}) { 1076 s.CopyValuesSubset(0, out) 1077 } 1078 1079 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 1080 // at the provided start index 1081 func (s *Uint16MemoTable) CopyValuesSubset(start int, out interface{}) { 1082 s.tbl.CopyValuesSubset(start, out.([]uint16)) 1083 } 1084 1085 func (s *Uint16MemoTable) WriteOut(out []byte) { 1086 s.tbl.CopyValues(arrow.Uint16Traits.CastFromBytes(out)) 1087 } 1088 1089 func (s *Uint16MemoTable) WriteOutSubset(start int, out []byte) { 1090 s.tbl.CopyValuesSubset(start, arrow.Uint16Traits.CastFromBytes(out)) 1091 } 1092 1093 func (s *Uint16MemoTable) WriteOutLE(out []byte) { 1094 s.tbl.WriteOut(out) 1095 } 1096 1097 func (s *Uint16MemoTable) WriteOutSubsetLE(start int, out []byte) { 1098 s.tbl.WriteOutSubset(start, out) 1099 } 1100 1101 // Get returns the index of the requested value in the hash table or KeyNotFound 1102 // along with a boolean indicating if it was found or not. 1103 func (s *Uint16MemoTable) Get(val interface{}) (int, bool) { 1104 1105 h := hashInt(uint64(val.(uint16)), 0) 1106 if e, ok := s.tbl.Lookup(h, func(v uint16) bool { return val.(uint16) == v }); ok { 1107 return int(e.payload.memoIdx), ok 1108 } 1109 return KeyNotFound, false 1110 } 1111 1112 // GetOrInsert will return the index of the specified value in the table, or insert the 1113 // value into the table and return the new index. found indicates whether or not it already 1114 // existed in the table (true) or was inserted by this call (false). 1115 func (s *Uint16MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 1116 1117 h := hashInt(uint64(val.(uint16)), 0) 1118 e, ok := s.tbl.Lookup(h, func(v uint16) bool { 1119 return val.(uint16) == v 1120 }) 1121 1122 if ok { 1123 idx = int(e.payload.memoIdx) 1124 found = true 1125 } else { 1126 idx = s.Size() 1127 s.tbl.Insert(e, h, val.(uint16), int32(idx)) 1128 } 1129 return 1130 } 1131 1132 // GetOrInsertBytes is unimplemented 1133 func (s *Uint16MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { 1134 panic("unimplemented") 1135 } 1136 1137 type payloadInt32 struct { 1138 val int32 1139 memoIdx int32 1140 } 1141 1142 type entryInt32 struct { 1143 h uint64 1144 payload payloadInt32 1145 } 1146 1147 func (e entryInt32) Valid() bool { return e.h != sentinel } 1148 1149 // Int32HashTable is a hashtable specifically for int32 that 1150 // is utilized with the MemoTable to generalize interactions for easier 1151 // implementation of dictionaries without losing performance. 1152 type Int32HashTable struct { 1153 cap uint64 1154 capMask uint64 1155 size uint64 1156 1157 entries []entryInt32 1158 } 1159 1160 // NewInt32HashTable returns a new hash table for int32 values 1161 // initialized with the passed in capacity or 32 whichever is larger. 1162 func NewInt32HashTable(cap uint64) *Int32HashTable { 1163 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 1164 ret := &Int32HashTable{cap: initCap, capMask: initCap - 1, size: 0} 1165 ret.entries = make([]entryInt32, initCap) 1166 return ret 1167 } 1168 1169 // Reset drops all of the values in this hash table and re-initializes it 1170 // with the specified initial capacity as if by calling New, but without having 1171 // to reallocate the object. 1172 func (h *Int32HashTable) Reset(cap uint64) { 1173 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 1174 h.capMask = h.cap - 1 1175 h.size = 0 1176 h.entries = make([]entryInt32, h.cap) 1177 } 1178 1179 // CopyValues is used for copying the values out of the hash table into the 1180 // passed in slice, in the order that they were first inserted 1181 func (h *Int32HashTable) CopyValues(out []int32) { 1182 h.CopyValuesSubset(0, out) 1183 } 1184 1185 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 1186 // with the value at start, in the order that they were inserted. 1187 func (h *Int32HashTable) CopyValuesSubset(start int, out []int32) { 1188 h.VisitEntries(func(e *entryInt32) { 1189 idx := e.payload.memoIdx - int32(start) 1190 if idx >= 0 { 1191 out[idx] = e.payload.val 1192 } 1193 }) 1194 } 1195 1196 func (h *Int32HashTable) WriteOut(out []byte) { 1197 h.WriteOutSubset(0, out) 1198 } 1199 1200 func (h *Int32HashTable) WriteOutSubset(start int, out []byte) { 1201 data := arrow.Int32Traits.CastFromBytes(out) 1202 h.VisitEntries(func(e *entryInt32) { 1203 idx := e.payload.memoIdx - int32(start) 1204 if idx >= 0 { 1205 data[idx] = utils.ToLEInt32(e.payload.val) 1206 } 1207 }) 1208 } 1209 1210 func (h *Int32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 1211 1212 func (Int32HashTable) fixHash(v uint64) uint64 { 1213 if v == sentinel { 1214 return 42 1215 } 1216 return v 1217 } 1218 1219 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 1220 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 1221 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 1222 func (h *Int32HashTable) Lookup(v uint64, cmp func(int32) bool) (*entryInt32, bool) { 1223 idx, ok := h.lookup(v, h.capMask, cmp) 1224 return &h.entries[idx], ok 1225 } 1226 1227 func (h *Int32HashTable) lookup(v uint64, szMask uint64, cmp func(int32) bool) (uint64, bool) { 1228 const perturbShift uint8 = 5 1229 1230 var ( 1231 idx uint64 1232 perturb uint64 1233 e *entryInt32 1234 ) 1235 1236 v = h.fixHash(v) 1237 idx = v & szMask 1238 perturb = (v >> uint64(perturbShift)) + 1 1239 1240 for { 1241 e = &h.entries[idx] 1242 if e.h == v && cmp(e.payload.val) { 1243 return idx, true 1244 } 1245 1246 if e.h == sentinel { 1247 return idx, false 1248 } 1249 1250 // perturbation logic inspired from CPython's set/dict object 1251 // the goal is that all 64 bits of unmasked hash value eventually 1252 // participate int he probing sequence, to minimize clustering 1253 idx = (idx + perturb) & szMask 1254 perturb = (perturb >> uint64(perturbShift)) + 1 1255 } 1256 } 1257 1258 func (h *Int32HashTable) upsize(newcap uint64) error { 1259 newMask := newcap - 1 1260 1261 oldEntries := h.entries 1262 h.entries = make([]entryInt32, newcap) 1263 for _, e := range oldEntries { 1264 if e.Valid() { 1265 idx, _ := h.lookup(e.h, newMask, func(int32) bool { return false }) 1266 h.entries[idx] = e 1267 } 1268 } 1269 h.cap = newcap 1270 h.capMask = newMask 1271 return nil 1272 } 1273 1274 // Insert updates the given entry with the provided hash value, payload value and memo index. 1275 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 1276 func (h *Int32HashTable) Insert(e *entryInt32, v uint64, val int32, memoIdx int32) error { 1277 e.h = h.fixHash(v) 1278 e.payload.val = val 1279 e.payload.memoIdx = memoIdx 1280 h.size++ 1281 1282 if h.needUpsize() { 1283 h.upsize(h.cap * uint64(loadFactor) * 2) 1284 } 1285 return nil 1286 } 1287 1288 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 1289 // a valid entry being one which has had a value inserted into it. 1290 func (h *Int32HashTable) VisitEntries(visit func(*entryInt32)) { 1291 for _, e := range h.entries { 1292 if e.Valid() { 1293 visit(&e) 1294 } 1295 } 1296 } 1297 1298 // Int32MemoTable is a wrapper over the appropriate hashtable to provide an interface 1299 // conforming to the MemoTable interface defined in the encoding package for general interactions 1300 // regarding dictionaries. 1301 type Int32MemoTable struct { 1302 tbl *Int32HashTable 1303 nullIdx int32 1304 } 1305 1306 // NewInt32MemoTable returns a new memotable with num entries pre-allocated to reduce further 1307 // allocations when inserting. 1308 func NewInt32MemoTable(num int64) *Int32MemoTable { 1309 return &Int32MemoTable{tbl: NewInt32HashTable(uint64(num)), nullIdx: KeyNotFound} 1310 } 1311 1312 func (Int32MemoTable) TypeTraits() TypeTraits { 1313 return arrow.Int32Traits 1314 } 1315 1316 // Reset allows this table to be re-used by dumping all the data currently in the table. 1317 func (s *Int32MemoTable) Reset() { 1318 s.tbl.Reset(32) 1319 s.nullIdx = KeyNotFound 1320 } 1321 1322 // Size returns the current number of inserted elements into the table including if a null 1323 // has been inserted. 1324 func (s *Int32MemoTable) Size() int { 1325 sz := int(s.tbl.size) 1326 if _, ok := s.GetNull(); ok { 1327 sz++ 1328 } 1329 return sz 1330 } 1331 1332 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 1333 // that will be true if found and false if not. 1334 func (s *Int32MemoTable) GetNull() (int, bool) { 1335 return int(s.nullIdx), s.nullIdx != KeyNotFound 1336 } 1337 1338 // GetOrInsertNull will return the index of the null entry or insert a null entry 1339 // if one currently doesn't exist. The found value will be true if there was already 1340 // a null in the table, and false if it inserted one. 1341 func (s *Int32MemoTable) GetOrInsertNull() (idx int, found bool) { 1342 idx, found = s.GetNull() 1343 if !found { 1344 idx = s.Size() 1345 s.nullIdx = int32(idx) 1346 } 1347 return 1348 } 1349 1350 // CopyValues will copy the values from the memo table out into the passed in slice 1351 // which must be of the appropriate type. 1352 func (s *Int32MemoTable) CopyValues(out interface{}) { 1353 s.CopyValuesSubset(0, out) 1354 } 1355 1356 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 1357 // at the provided start index 1358 func (s *Int32MemoTable) CopyValuesSubset(start int, out interface{}) { 1359 s.tbl.CopyValuesSubset(start, out.([]int32)) 1360 } 1361 1362 func (s *Int32MemoTable) WriteOut(out []byte) { 1363 s.tbl.CopyValues(arrow.Int32Traits.CastFromBytes(out)) 1364 } 1365 1366 func (s *Int32MemoTable) WriteOutSubset(start int, out []byte) { 1367 s.tbl.CopyValuesSubset(start, arrow.Int32Traits.CastFromBytes(out)) 1368 } 1369 1370 func (s *Int32MemoTable) WriteOutLE(out []byte) { 1371 s.tbl.WriteOut(out) 1372 } 1373 1374 func (s *Int32MemoTable) WriteOutSubsetLE(start int, out []byte) { 1375 s.tbl.WriteOutSubset(start, out) 1376 } 1377 1378 // Get returns the index of the requested value in the hash table or KeyNotFound 1379 // along with a boolean indicating if it was found or not. 1380 func (s *Int32MemoTable) Get(val interface{}) (int, bool) { 1381 1382 h := hashInt(uint64(val.(int32)), 0) 1383 if e, ok := s.tbl.Lookup(h, func(v int32) bool { return val.(int32) == v }); ok { 1384 return int(e.payload.memoIdx), ok 1385 } 1386 return KeyNotFound, false 1387 } 1388 1389 // GetOrInsert will return the index of the specified value in the table, or insert the 1390 // value into the table and return the new index. found indicates whether or not it already 1391 // existed in the table (true) or was inserted by this call (false). 1392 func (s *Int32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 1393 1394 h := hashInt(uint64(val.(int32)), 0) 1395 e, ok := s.tbl.Lookup(h, func(v int32) bool { 1396 return val.(int32) == v 1397 }) 1398 1399 if ok { 1400 idx = int(e.payload.memoIdx) 1401 found = true 1402 } else { 1403 idx = s.Size() 1404 s.tbl.Insert(e, h, val.(int32), int32(idx)) 1405 } 1406 return 1407 } 1408 1409 // GetOrInsertBytes is unimplemented 1410 func (s *Int32MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { 1411 panic("unimplemented") 1412 } 1413 1414 type payloadInt64 struct { 1415 val int64 1416 memoIdx int32 1417 } 1418 1419 type entryInt64 struct { 1420 h uint64 1421 payload payloadInt64 1422 } 1423 1424 func (e entryInt64) Valid() bool { return e.h != sentinel } 1425 1426 // Int64HashTable is a hashtable specifically for int64 that 1427 // is utilized with the MemoTable to generalize interactions for easier 1428 // implementation of dictionaries without losing performance. 1429 type Int64HashTable struct { 1430 cap uint64 1431 capMask uint64 1432 size uint64 1433 1434 entries []entryInt64 1435 } 1436 1437 // NewInt64HashTable returns a new hash table for int64 values 1438 // initialized with the passed in capacity or 32 whichever is larger. 1439 func NewInt64HashTable(cap uint64) *Int64HashTable { 1440 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 1441 ret := &Int64HashTable{cap: initCap, capMask: initCap - 1, size: 0} 1442 ret.entries = make([]entryInt64, initCap) 1443 return ret 1444 } 1445 1446 // Reset drops all of the values in this hash table and re-initializes it 1447 // with the specified initial capacity as if by calling New, but without having 1448 // to reallocate the object. 1449 func (h *Int64HashTable) Reset(cap uint64) { 1450 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 1451 h.capMask = h.cap - 1 1452 h.size = 0 1453 h.entries = make([]entryInt64, h.cap) 1454 } 1455 1456 // CopyValues is used for copying the values out of the hash table into the 1457 // passed in slice, in the order that they were first inserted 1458 func (h *Int64HashTable) CopyValues(out []int64) { 1459 h.CopyValuesSubset(0, out) 1460 } 1461 1462 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 1463 // with the value at start, in the order that they were inserted. 1464 func (h *Int64HashTable) CopyValuesSubset(start int, out []int64) { 1465 h.VisitEntries(func(e *entryInt64) { 1466 idx := e.payload.memoIdx - int32(start) 1467 if idx >= 0 { 1468 out[idx] = e.payload.val 1469 } 1470 }) 1471 } 1472 1473 func (h *Int64HashTable) WriteOut(out []byte) { 1474 h.WriteOutSubset(0, out) 1475 } 1476 1477 func (h *Int64HashTable) WriteOutSubset(start int, out []byte) { 1478 data := arrow.Int64Traits.CastFromBytes(out) 1479 h.VisitEntries(func(e *entryInt64) { 1480 idx := e.payload.memoIdx - int32(start) 1481 if idx >= 0 { 1482 data[idx] = utils.ToLEInt64(e.payload.val) 1483 } 1484 }) 1485 } 1486 1487 func (h *Int64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 1488 1489 func (Int64HashTable) fixHash(v uint64) uint64 { 1490 if v == sentinel { 1491 return 42 1492 } 1493 return v 1494 } 1495 1496 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 1497 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 1498 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 1499 func (h *Int64HashTable) Lookup(v uint64, cmp func(int64) bool) (*entryInt64, bool) { 1500 idx, ok := h.lookup(v, h.capMask, cmp) 1501 return &h.entries[idx], ok 1502 } 1503 1504 func (h *Int64HashTable) lookup(v uint64, szMask uint64, cmp func(int64) bool) (uint64, bool) { 1505 const perturbShift uint8 = 5 1506 1507 var ( 1508 idx uint64 1509 perturb uint64 1510 e *entryInt64 1511 ) 1512 1513 v = h.fixHash(v) 1514 idx = v & szMask 1515 perturb = (v >> uint64(perturbShift)) + 1 1516 1517 for { 1518 e = &h.entries[idx] 1519 if e.h == v && cmp(e.payload.val) { 1520 return idx, true 1521 } 1522 1523 if e.h == sentinel { 1524 return idx, false 1525 } 1526 1527 // perturbation logic inspired from CPython's set/dict object 1528 // the goal is that all 64 bits of unmasked hash value eventually 1529 // participate int he probing sequence, to minimize clustering 1530 idx = (idx + perturb) & szMask 1531 perturb = (perturb >> uint64(perturbShift)) + 1 1532 } 1533 } 1534 1535 func (h *Int64HashTable) upsize(newcap uint64) error { 1536 newMask := newcap - 1 1537 1538 oldEntries := h.entries 1539 h.entries = make([]entryInt64, newcap) 1540 for _, e := range oldEntries { 1541 if e.Valid() { 1542 idx, _ := h.lookup(e.h, newMask, func(int64) bool { return false }) 1543 h.entries[idx] = e 1544 } 1545 } 1546 h.cap = newcap 1547 h.capMask = newMask 1548 return nil 1549 } 1550 1551 // Insert updates the given entry with the provided hash value, payload value and memo index. 1552 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 1553 func (h *Int64HashTable) Insert(e *entryInt64, v uint64, val int64, memoIdx int32) error { 1554 e.h = h.fixHash(v) 1555 e.payload.val = val 1556 e.payload.memoIdx = memoIdx 1557 h.size++ 1558 1559 if h.needUpsize() { 1560 h.upsize(h.cap * uint64(loadFactor) * 2) 1561 } 1562 return nil 1563 } 1564 1565 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 1566 // a valid entry being one which has had a value inserted into it. 1567 func (h *Int64HashTable) VisitEntries(visit func(*entryInt64)) { 1568 for _, e := range h.entries { 1569 if e.Valid() { 1570 visit(&e) 1571 } 1572 } 1573 } 1574 1575 // Int64MemoTable is a wrapper over the appropriate hashtable to provide an interface 1576 // conforming to the MemoTable interface defined in the encoding package for general interactions 1577 // regarding dictionaries. 1578 type Int64MemoTable struct { 1579 tbl *Int64HashTable 1580 nullIdx int32 1581 } 1582 1583 // NewInt64MemoTable returns a new memotable with num entries pre-allocated to reduce further 1584 // allocations when inserting. 1585 func NewInt64MemoTable(num int64) *Int64MemoTable { 1586 return &Int64MemoTable{tbl: NewInt64HashTable(uint64(num)), nullIdx: KeyNotFound} 1587 } 1588 1589 func (Int64MemoTable) TypeTraits() TypeTraits { 1590 return arrow.Int64Traits 1591 } 1592 1593 // Reset allows this table to be re-used by dumping all the data currently in the table. 1594 func (s *Int64MemoTable) Reset() { 1595 s.tbl.Reset(32) 1596 s.nullIdx = KeyNotFound 1597 } 1598 1599 // Size returns the current number of inserted elements into the table including if a null 1600 // has been inserted. 1601 func (s *Int64MemoTable) Size() int { 1602 sz := int(s.tbl.size) 1603 if _, ok := s.GetNull(); ok { 1604 sz++ 1605 } 1606 return sz 1607 } 1608 1609 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 1610 // that will be true if found and false if not. 1611 func (s *Int64MemoTable) GetNull() (int, bool) { 1612 return int(s.nullIdx), s.nullIdx != KeyNotFound 1613 } 1614 1615 // GetOrInsertNull will return the index of the null entry or insert a null entry 1616 // if one currently doesn't exist. The found value will be true if there was already 1617 // a null in the table, and false if it inserted one. 1618 func (s *Int64MemoTable) GetOrInsertNull() (idx int, found bool) { 1619 idx, found = s.GetNull() 1620 if !found { 1621 idx = s.Size() 1622 s.nullIdx = int32(idx) 1623 } 1624 return 1625 } 1626 1627 // CopyValues will copy the values from the memo table out into the passed in slice 1628 // which must be of the appropriate type. 1629 func (s *Int64MemoTable) CopyValues(out interface{}) { 1630 s.CopyValuesSubset(0, out) 1631 } 1632 1633 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 1634 // at the provided start index 1635 func (s *Int64MemoTable) CopyValuesSubset(start int, out interface{}) { 1636 s.tbl.CopyValuesSubset(start, out.([]int64)) 1637 } 1638 1639 func (s *Int64MemoTable) WriteOut(out []byte) { 1640 s.tbl.CopyValues(arrow.Int64Traits.CastFromBytes(out)) 1641 } 1642 1643 func (s *Int64MemoTable) WriteOutSubset(start int, out []byte) { 1644 s.tbl.CopyValuesSubset(start, arrow.Int64Traits.CastFromBytes(out)) 1645 } 1646 1647 func (s *Int64MemoTable) WriteOutLE(out []byte) { 1648 s.tbl.WriteOut(out) 1649 } 1650 1651 func (s *Int64MemoTable) WriteOutSubsetLE(start int, out []byte) { 1652 s.tbl.WriteOutSubset(start, out) 1653 } 1654 1655 // Get returns the index of the requested value in the hash table or KeyNotFound 1656 // along with a boolean indicating if it was found or not. 1657 func (s *Int64MemoTable) Get(val interface{}) (int, bool) { 1658 1659 h := hashInt(uint64(val.(int64)), 0) 1660 if e, ok := s.tbl.Lookup(h, func(v int64) bool { return val.(int64) == v }); ok { 1661 return int(e.payload.memoIdx), ok 1662 } 1663 return KeyNotFound, false 1664 } 1665 1666 // GetOrInsert will return the index of the specified value in the table, or insert the 1667 // value into the table and return the new index. found indicates whether or not it already 1668 // existed in the table (true) or was inserted by this call (false). 1669 func (s *Int64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 1670 1671 h := hashInt(uint64(val.(int64)), 0) 1672 e, ok := s.tbl.Lookup(h, func(v int64) bool { 1673 return val.(int64) == v 1674 }) 1675 1676 if ok { 1677 idx = int(e.payload.memoIdx) 1678 found = true 1679 } else { 1680 idx = s.Size() 1681 s.tbl.Insert(e, h, val.(int64), int32(idx)) 1682 } 1683 return 1684 } 1685 1686 // GetOrInsertBytes is unimplemented 1687 func (s *Int64MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { 1688 panic("unimplemented") 1689 } 1690 1691 type payloadUint32 struct { 1692 val uint32 1693 memoIdx int32 1694 } 1695 1696 type entryUint32 struct { 1697 h uint64 1698 payload payloadUint32 1699 } 1700 1701 func (e entryUint32) Valid() bool { return e.h != sentinel } 1702 1703 // Uint32HashTable is a hashtable specifically for uint32 that 1704 // is utilized with the MemoTable to generalize interactions for easier 1705 // implementation of dictionaries without losing performance. 1706 type Uint32HashTable struct { 1707 cap uint64 1708 capMask uint64 1709 size uint64 1710 1711 entries []entryUint32 1712 } 1713 1714 // NewUint32HashTable returns a new hash table for uint32 values 1715 // initialized with the passed in capacity or 32 whichever is larger. 1716 func NewUint32HashTable(cap uint64) *Uint32HashTable { 1717 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 1718 ret := &Uint32HashTable{cap: initCap, capMask: initCap - 1, size: 0} 1719 ret.entries = make([]entryUint32, initCap) 1720 return ret 1721 } 1722 1723 // Reset drops all of the values in this hash table and re-initializes it 1724 // with the specified initial capacity as if by calling New, but without having 1725 // to reallocate the object. 1726 func (h *Uint32HashTable) Reset(cap uint64) { 1727 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 1728 h.capMask = h.cap - 1 1729 h.size = 0 1730 h.entries = make([]entryUint32, h.cap) 1731 } 1732 1733 // CopyValues is used for copying the values out of the hash table into the 1734 // passed in slice, in the order that they were first inserted 1735 func (h *Uint32HashTable) CopyValues(out []uint32) { 1736 h.CopyValuesSubset(0, out) 1737 } 1738 1739 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 1740 // with the value at start, in the order that they were inserted. 1741 func (h *Uint32HashTable) CopyValuesSubset(start int, out []uint32) { 1742 h.VisitEntries(func(e *entryUint32) { 1743 idx := e.payload.memoIdx - int32(start) 1744 if idx >= 0 { 1745 out[idx] = e.payload.val 1746 } 1747 }) 1748 } 1749 1750 func (h *Uint32HashTable) WriteOut(out []byte) { 1751 h.WriteOutSubset(0, out) 1752 } 1753 1754 func (h *Uint32HashTable) WriteOutSubset(start int, out []byte) { 1755 data := arrow.Uint32Traits.CastFromBytes(out) 1756 h.VisitEntries(func(e *entryUint32) { 1757 idx := e.payload.memoIdx - int32(start) 1758 if idx >= 0 { 1759 data[idx] = utils.ToLEUint32(e.payload.val) 1760 } 1761 }) 1762 } 1763 1764 func (h *Uint32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 1765 1766 func (Uint32HashTable) fixHash(v uint64) uint64 { 1767 if v == sentinel { 1768 return 42 1769 } 1770 return v 1771 } 1772 1773 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 1774 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 1775 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 1776 func (h *Uint32HashTable) Lookup(v uint64, cmp func(uint32) bool) (*entryUint32, bool) { 1777 idx, ok := h.lookup(v, h.capMask, cmp) 1778 return &h.entries[idx], ok 1779 } 1780 1781 func (h *Uint32HashTable) lookup(v uint64, szMask uint64, cmp func(uint32) bool) (uint64, bool) { 1782 const perturbShift uint8 = 5 1783 1784 var ( 1785 idx uint64 1786 perturb uint64 1787 e *entryUint32 1788 ) 1789 1790 v = h.fixHash(v) 1791 idx = v & szMask 1792 perturb = (v >> uint64(perturbShift)) + 1 1793 1794 for { 1795 e = &h.entries[idx] 1796 if e.h == v && cmp(e.payload.val) { 1797 return idx, true 1798 } 1799 1800 if e.h == sentinel { 1801 return idx, false 1802 } 1803 1804 // perturbation logic inspired from CPython's set/dict object 1805 // the goal is that all 64 bits of unmasked hash value eventually 1806 // participate int he probing sequence, to minimize clustering 1807 idx = (idx + perturb) & szMask 1808 perturb = (perturb >> uint64(perturbShift)) + 1 1809 } 1810 } 1811 1812 func (h *Uint32HashTable) upsize(newcap uint64) error { 1813 newMask := newcap - 1 1814 1815 oldEntries := h.entries 1816 h.entries = make([]entryUint32, newcap) 1817 for _, e := range oldEntries { 1818 if e.Valid() { 1819 idx, _ := h.lookup(e.h, newMask, func(uint32) bool { return false }) 1820 h.entries[idx] = e 1821 } 1822 } 1823 h.cap = newcap 1824 h.capMask = newMask 1825 return nil 1826 } 1827 1828 // Insert updates the given entry with the provided hash value, payload value and memo index. 1829 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 1830 func (h *Uint32HashTable) Insert(e *entryUint32, v uint64, val uint32, memoIdx int32) error { 1831 e.h = h.fixHash(v) 1832 e.payload.val = val 1833 e.payload.memoIdx = memoIdx 1834 h.size++ 1835 1836 if h.needUpsize() { 1837 h.upsize(h.cap * uint64(loadFactor) * 2) 1838 } 1839 return nil 1840 } 1841 1842 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 1843 // a valid entry being one which has had a value inserted into it. 1844 func (h *Uint32HashTable) VisitEntries(visit func(*entryUint32)) { 1845 for _, e := range h.entries { 1846 if e.Valid() { 1847 visit(&e) 1848 } 1849 } 1850 } 1851 1852 // Uint32MemoTable is a wrapper over the appropriate hashtable to provide an interface 1853 // conforming to the MemoTable interface defined in the encoding package for general interactions 1854 // regarding dictionaries. 1855 type Uint32MemoTable struct { 1856 tbl *Uint32HashTable 1857 nullIdx int32 1858 } 1859 1860 // NewUint32MemoTable returns a new memotable with num entries pre-allocated to reduce further 1861 // allocations when inserting. 1862 func NewUint32MemoTable(num int64) *Uint32MemoTable { 1863 return &Uint32MemoTable{tbl: NewUint32HashTable(uint64(num)), nullIdx: KeyNotFound} 1864 } 1865 1866 func (Uint32MemoTable) TypeTraits() TypeTraits { 1867 return arrow.Uint32Traits 1868 } 1869 1870 // Reset allows this table to be re-used by dumping all the data currently in the table. 1871 func (s *Uint32MemoTable) Reset() { 1872 s.tbl.Reset(32) 1873 s.nullIdx = KeyNotFound 1874 } 1875 1876 // Size returns the current number of inserted elements into the table including if a null 1877 // has been inserted. 1878 func (s *Uint32MemoTable) Size() int { 1879 sz := int(s.tbl.size) 1880 if _, ok := s.GetNull(); ok { 1881 sz++ 1882 } 1883 return sz 1884 } 1885 1886 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 1887 // that will be true if found and false if not. 1888 func (s *Uint32MemoTable) GetNull() (int, bool) { 1889 return int(s.nullIdx), s.nullIdx != KeyNotFound 1890 } 1891 1892 // GetOrInsertNull will return the index of the null entry or insert a null entry 1893 // if one currently doesn't exist. The found value will be true if there was already 1894 // a null in the table, and false if it inserted one. 1895 func (s *Uint32MemoTable) GetOrInsertNull() (idx int, found bool) { 1896 idx, found = s.GetNull() 1897 if !found { 1898 idx = s.Size() 1899 s.nullIdx = int32(idx) 1900 } 1901 return 1902 } 1903 1904 // CopyValues will copy the values from the memo table out into the passed in slice 1905 // which must be of the appropriate type. 1906 func (s *Uint32MemoTable) CopyValues(out interface{}) { 1907 s.CopyValuesSubset(0, out) 1908 } 1909 1910 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 1911 // at the provided start index 1912 func (s *Uint32MemoTable) CopyValuesSubset(start int, out interface{}) { 1913 s.tbl.CopyValuesSubset(start, out.([]uint32)) 1914 } 1915 1916 func (s *Uint32MemoTable) WriteOut(out []byte) { 1917 s.tbl.CopyValues(arrow.Uint32Traits.CastFromBytes(out)) 1918 } 1919 1920 func (s *Uint32MemoTable) WriteOutSubset(start int, out []byte) { 1921 s.tbl.CopyValuesSubset(start, arrow.Uint32Traits.CastFromBytes(out)) 1922 } 1923 1924 func (s *Uint32MemoTable) WriteOutLE(out []byte) { 1925 s.tbl.WriteOut(out) 1926 } 1927 1928 func (s *Uint32MemoTable) WriteOutSubsetLE(start int, out []byte) { 1929 s.tbl.WriteOutSubset(start, out) 1930 } 1931 1932 // Get returns the index of the requested value in the hash table or KeyNotFound 1933 // along with a boolean indicating if it was found or not. 1934 func (s *Uint32MemoTable) Get(val interface{}) (int, bool) { 1935 1936 h := hashInt(uint64(val.(uint32)), 0) 1937 if e, ok := s.tbl.Lookup(h, func(v uint32) bool { return val.(uint32) == v }); ok { 1938 return int(e.payload.memoIdx), ok 1939 } 1940 return KeyNotFound, false 1941 } 1942 1943 // GetOrInsert will return the index of the specified value in the table, or insert the 1944 // value into the table and return the new index. found indicates whether or not it already 1945 // existed in the table (true) or was inserted by this call (false). 1946 func (s *Uint32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 1947 1948 h := hashInt(uint64(val.(uint32)), 0) 1949 e, ok := s.tbl.Lookup(h, func(v uint32) bool { 1950 return val.(uint32) == v 1951 }) 1952 1953 if ok { 1954 idx = int(e.payload.memoIdx) 1955 found = true 1956 } else { 1957 idx = s.Size() 1958 s.tbl.Insert(e, h, val.(uint32), int32(idx)) 1959 } 1960 return 1961 } 1962 1963 // GetOrInsertBytes is unimplemented 1964 func (s *Uint32MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { 1965 panic("unimplemented") 1966 } 1967 1968 type payloadUint64 struct { 1969 val uint64 1970 memoIdx int32 1971 } 1972 1973 type entryUint64 struct { 1974 h uint64 1975 payload payloadUint64 1976 } 1977 1978 func (e entryUint64) Valid() bool { return e.h != sentinel } 1979 1980 // Uint64HashTable is a hashtable specifically for uint64 that 1981 // is utilized with the MemoTable to generalize interactions for easier 1982 // implementation of dictionaries without losing performance. 1983 type Uint64HashTable struct { 1984 cap uint64 1985 capMask uint64 1986 size uint64 1987 1988 entries []entryUint64 1989 } 1990 1991 // NewUint64HashTable returns a new hash table for uint64 values 1992 // initialized with the passed in capacity or 32 whichever is larger. 1993 func NewUint64HashTable(cap uint64) *Uint64HashTable { 1994 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 1995 ret := &Uint64HashTable{cap: initCap, capMask: initCap - 1, size: 0} 1996 ret.entries = make([]entryUint64, initCap) 1997 return ret 1998 } 1999 2000 // Reset drops all of the values in this hash table and re-initializes it 2001 // with the specified initial capacity as if by calling New, but without having 2002 // to reallocate the object. 2003 func (h *Uint64HashTable) Reset(cap uint64) { 2004 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 2005 h.capMask = h.cap - 1 2006 h.size = 0 2007 h.entries = make([]entryUint64, h.cap) 2008 } 2009 2010 // CopyValues is used for copying the values out of the hash table into the 2011 // passed in slice, in the order that they were first inserted 2012 func (h *Uint64HashTable) CopyValues(out []uint64) { 2013 h.CopyValuesSubset(0, out) 2014 } 2015 2016 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 2017 // with the value at start, in the order that they were inserted. 2018 func (h *Uint64HashTable) CopyValuesSubset(start int, out []uint64) { 2019 h.VisitEntries(func(e *entryUint64) { 2020 idx := e.payload.memoIdx - int32(start) 2021 if idx >= 0 { 2022 out[idx] = e.payload.val 2023 } 2024 }) 2025 } 2026 2027 func (h *Uint64HashTable) WriteOut(out []byte) { 2028 h.WriteOutSubset(0, out) 2029 } 2030 2031 func (h *Uint64HashTable) WriteOutSubset(start int, out []byte) { 2032 data := arrow.Uint64Traits.CastFromBytes(out) 2033 h.VisitEntries(func(e *entryUint64) { 2034 idx := e.payload.memoIdx - int32(start) 2035 if idx >= 0 { 2036 data[idx] = utils.ToLEUint64(e.payload.val) 2037 } 2038 }) 2039 } 2040 2041 func (h *Uint64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 2042 2043 func (Uint64HashTable) fixHash(v uint64) uint64 { 2044 if v == sentinel { 2045 return 42 2046 } 2047 return v 2048 } 2049 2050 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 2051 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 2052 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 2053 func (h *Uint64HashTable) Lookup(v uint64, cmp func(uint64) bool) (*entryUint64, bool) { 2054 idx, ok := h.lookup(v, h.capMask, cmp) 2055 return &h.entries[idx], ok 2056 } 2057 2058 func (h *Uint64HashTable) lookup(v uint64, szMask uint64, cmp func(uint64) bool) (uint64, bool) { 2059 const perturbShift uint8 = 5 2060 2061 var ( 2062 idx uint64 2063 perturb uint64 2064 e *entryUint64 2065 ) 2066 2067 v = h.fixHash(v) 2068 idx = v & szMask 2069 perturb = (v >> uint64(perturbShift)) + 1 2070 2071 for { 2072 e = &h.entries[idx] 2073 if e.h == v && cmp(e.payload.val) { 2074 return idx, true 2075 } 2076 2077 if e.h == sentinel { 2078 return idx, false 2079 } 2080 2081 // perturbation logic inspired from CPython's set/dict object 2082 // the goal is that all 64 bits of unmasked hash value eventually 2083 // participate int he probing sequence, to minimize clustering 2084 idx = (idx + perturb) & szMask 2085 perturb = (perturb >> uint64(perturbShift)) + 1 2086 } 2087 } 2088 2089 func (h *Uint64HashTable) upsize(newcap uint64) error { 2090 newMask := newcap - 1 2091 2092 oldEntries := h.entries 2093 h.entries = make([]entryUint64, newcap) 2094 for _, e := range oldEntries { 2095 if e.Valid() { 2096 idx, _ := h.lookup(e.h, newMask, func(uint64) bool { return false }) 2097 h.entries[idx] = e 2098 } 2099 } 2100 h.cap = newcap 2101 h.capMask = newMask 2102 return nil 2103 } 2104 2105 // Insert updates the given entry with the provided hash value, payload value and memo index. 2106 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 2107 func (h *Uint64HashTable) Insert(e *entryUint64, v uint64, val uint64, memoIdx int32) error { 2108 e.h = h.fixHash(v) 2109 e.payload.val = val 2110 e.payload.memoIdx = memoIdx 2111 h.size++ 2112 2113 if h.needUpsize() { 2114 h.upsize(h.cap * uint64(loadFactor) * 2) 2115 } 2116 return nil 2117 } 2118 2119 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 2120 // a valid entry being one which has had a value inserted into it. 2121 func (h *Uint64HashTable) VisitEntries(visit func(*entryUint64)) { 2122 for _, e := range h.entries { 2123 if e.Valid() { 2124 visit(&e) 2125 } 2126 } 2127 } 2128 2129 // Uint64MemoTable is a wrapper over the appropriate hashtable to provide an interface 2130 // conforming to the MemoTable interface defined in the encoding package for general interactions 2131 // regarding dictionaries. 2132 type Uint64MemoTable struct { 2133 tbl *Uint64HashTable 2134 nullIdx int32 2135 } 2136 2137 // NewUint64MemoTable returns a new memotable with num entries pre-allocated to reduce further 2138 // allocations when inserting. 2139 func NewUint64MemoTable(num int64) *Uint64MemoTable { 2140 return &Uint64MemoTable{tbl: NewUint64HashTable(uint64(num)), nullIdx: KeyNotFound} 2141 } 2142 2143 func (Uint64MemoTable) TypeTraits() TypeTraits { 2144 return arrow.Uint64Traits 2145 } 2146 2147 // Reset allows this table to be re-used by dumping all the data currently in the table. 2148 func (s *Uint64MemoTable) Reset() { 2149 s.tbl.Reset(32) 2150 s.nullIdx = KeyNotFound 2151 } 2152 2153 // Size returns the current number of inserted elements into the table including if a null 2154 // has been inserted. 2155 func (s *Uint64MemoTable) Size() int { 2156 sz := int(s.tbl.size) 2157 if _, ok := s.GetNull(); ok { 2158 sz++ 2159 } 2160 return sz 2161 } 2162 2163 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 2164 // that will be true if found and false if not. 2165 func (s *Uint64MemoTable) GetNull() (int, bool) { 2166 return int(s.nullIdx), s.nullIdx != KeyNotFound 2167 } 2168 2169 // GetOrInsertNull will return the index of the null entry or insert a null entry 2170 // if one currently doesn't exist. The found value will be true if there was already 2171 // a null in the table, and false if it inserted one. 2172 func (s *Uint64MemoTable) GetOrInsertNull() (idx int, found bool) { 2173 idx, found = s.GetNull() 2174 if !found { 2175 idx = s.Size() 2176 s.nullIdx = int32(idx) 2177 } 2178 return 2179 } 2180 2181 // CopyValues will copy the values from the memo table out into the passed in slice 2182 // which must be of the appropriate type. 2183 func (s *Uint64MemoTable) CopyValues(out interface{}) { 2184 s.CopyValuesSubset(0, out) 2185 } 2186 2187 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 2188 // at the provided start index 2189 func (s *Uint64MemoTable) CopyValuesSubset(start int, out interface{}) { 2190 s.tbl.CopyValuesSubset(start, out.([]uint64)) 2191 } 2192 2193 func (s *Uint64MemoTable) WriteOut(out []byte) { 2194 s.tbl.CopyValues(arrow.Uint64Traits.CastFromBytes(out)) 2195 } 2196 2197 func (s *Uint64MemoTable) WriteOutSubset(start int, out []byte) { 2198 s.tbl.CopyValuesSubset(start, arrow.Uint64Traits.CastFromBytes(out)) 2199 } 2200 2201 func (s *Uint64MemoTable) WriteOutLE(out []byte) { 2202 s.tbl.WriteOut(out) 2203 } 2204 2205 func (s *Uint64MemoTable) WriteOutSubsetLE(start int, out []byte) { 2206 s.tbl.WriteOutSubset(start, out) 2207 } 2208 2209 // Get returns the index of the requested value in the hash table or KeyNotFound 2210 // along with a boolean indicating if it was found or not. 2211 func (s *Uint64MemoTable) Get(val interface{}) (int, bool) { 2212 2213 h := hashInt(uint64(val.(uint64)), 0) 2214 if e, ok := s.tbl.Lookup(h, func(v uint64) bool { return val.(uint64) == v }); ok { 2215 return int(e.payload.memoIdx), ok 2216 } 2217 return KeyNotFound, false 2218 } 2219 2220 // GetOrInsert will return the index of the specified value in the table, or insert the 2221 // value into the table and return the new index. found indicates whether or not it already 2222 // existed in the table (true) or was inserted by this call (false). 2223 func (s *Uint64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 2224 2225 h := hashInt(uint64(val.(uint64)), 0) 2226 e, ok := s.tbl.Lookup(h, func(v uint64) bool { 2227 return val.(uint64) == v 2228 }) 2229 2230 if ok { 2231 idx = int(e.payload.memoIdx) 2232 found = true 2233 } else { 2234 idx = s.Size() 2235 s.tbl.Insert(e, h, val.(uint64), int32(idx)) 2236 } 2237 return 2238 } 2239 2240 // GetOrInsertBytes is unimplemented 2241 func (s *Uint64MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { 2242 panic("unimplemented") 2243 } 2244 2245 type payloadFloat32 struct { 2246 val float32 2247 memoIdx int32 2248 } 2249 2250 type entryFloat32 struct { 2251 h uint64 2252 payload payloadFloat32 2253 } 2254 2255 func (e entryFloat32) Valid() bool { return e.h != sentinel } 2256 2257 // Float32HashTable is a hashtable specifically for float32 that 2258 // is utilized with the MemoTable to generalize interactions for easier 2259 // implementation of dictionaries without losing performance. 2260 type Float32HashTable struct { 2261 cap uint64 2262 capMask uint64 2263 size uint64 2264 2265 entries []entryFloat32 2266 } 2267 2268 // NewFloat32HashTable returns a new hash table for float32 values 2269 // initialized with the passed in capacity or 32 whichever is larger. 2270 func NewFloat32HashTable(cap uint64) *Float32HashTable { 2271 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 2272 ret := &Float32HashTable{cap: initCap, capMask: initCap - 1, size: 0} 2273 ret.entries = make([]entryFloat32, initCap) 2274 return ret 2275 } 2276 2277 // Reset drops all of the values in this hash table and re-initializes it 2278 // with the specified initial capacity as if by calling New, but without having 2279 // to reallocate the object. 2280 func (h *Float32HashTable) Reset(cap uint64) { 2281 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 2282 h.capMask = h.cap - 1 2283 h.size = 0 2284 h.entries = make([]entryFloat32, h.cap) 2285 } 2286 2287 // CopyValues is used for copying the values out of the hash table into the 2288 // passed in slice, in the order that they were first inserted 2289 func (h *Float32HashTable) CopyValues(out []float32) { 2290 h.CopyValuesSubset(0, out) 2291 } 2292 2293 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 2294 // with the value at start, in the order that they were inserted. 2295 func (h *Float32HashTable) CopyValuesSubset(start int, out []float32) { 2296 h.VisitEntries(func(e *entryFloat32) { 2297 idx := e.payload.memoIdx - int32(start) 2298 if idx >= 0 { 2299 out[idx] = e.payload.val 2300 } 2301 }) 2302 } 2303 2304 func (h *Float32HashTable) WriteOut(out []byte) { 2305 h.WriteOutSubset(0, out) 2306 } 2307 2308 func (h *Float32HashTable) WriteOutSubset(start int, out []byte) { 2309 data := arrow.Float32Traits.CastFromBytes(out) 2310 h.VisitEntries(func(e *entryFloat32) { 2311 idx := e.payload.memoIdx - int32(start) 2312 if idx >= 0 { 2313 data[idx] = utils.ToLEFloat32(e.payload.val) 2314 } 2315 }) 2316 } 2317 2318 func (h *Float32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 2319 2320 func (Float32HashTable) fixHash(v uint64) uint64 { 2321 if v == sentinel { 2322 return 42 2323 } 2324 return v 2325 } 2326 2327 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 2328 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 2329 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 2330 func (h *Float32HashTable) Lookup(v uint64, cmp func(float32) bool) (*entryFloat32, bool) { 2331 idx, ok := h.lookup(v, h.capMask, cmp) 2332 return &h.entries[idx], ok 2333 } 2334 2335 func (h *Float32HashTable) lookup(v uint64, szMask uint64, cmp func(float32) bool) (uint64, bool) { 2336 const perturbShift uint8 = 5 2337 2338 var ( 2339 idx uint64 2340 perturb uint64 2341 e *entryFloat32 2342 ) 2343 2344 v = h.fixHash(v) 2345 idx = v & szMask 2346 perturb = (v >> uint64(perturbShift)) + 1 2347 2348 for { 2349 e = &h.entries[idx] 2350 if e.h == v && cmp(e.payload.val) { 2351 return idx, true 2352 } 2353 2354 if e.h == sentinel { 2355 return idx, false 2356 } 2357 2358 // perturbation logic inspired from CPython's set/dict object 2359 // the goal is that all 64 bits of unmasked hash value eventually 2360 // participate int he probing sequence, to minimize clustering 2361 idx = (idx + perturb) & szMask 2362 perturb = (perturb >> uint64(perturbShift)) + 1 2363 } 2364 } 2365 2366 func (h *Float32HashTable) upsize(newcap uint64) error { 2367 newMask := newcap - 1 2368 2369 oldEntries := h.entries 2370 h.entries = make([]entryFloat32, newcap) 2371 for _, e := range oldEntries { 2372 if e.Valid() { 2373 idx, _ := h.lookup(e.h, newMask, func(float32) bool { return false }) 2374 h.entries[idx] = e 2375 } 2376 } 2377 h.cap = newcap 2378 h.capMask = newMask 2379 return nil 2380 } 2381 2382 // Insert updates the given entry with the provided hash value, payload value and memo index. 2383 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 2384 func (h *Float32HashTable) Insert(e *entryFloat32, v uint64, val float32, memoIdx int32) error { 2385 e.h = h.fixHash(v) 2386 e.payload.val = val 2387 e.payload.memoIdx = memoIdx 2388 h.size++ 2389 2390 if h.needUpsize() { 2391 h.upsize(h.cap * uint64(loadFactor) * 2) 2392 } 2393 return nil 2394 } 2395 2396 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 2397 // a valid entry being one which has had a value inserted into it. 2398 func (h *Float32HashTable) VisitEntries(visit func(*entryFloat32)) { 2399 for _, e := range h.entries { 2400 if e.Valid() { 2401 visit(&e) 2402 } 2403 } 2404 } 2405 2406 // Float32MemoTable is a wrapper over the appropriate hashtable to provide an interface 2407 // conforming to the MemoTable interface defined in the encoding package for general interactions 2408 // regarding dictionaries. 2409 type Float32MemoTable struct { 2410 tbl *Float32HashTable 2411 nullIdx int32 2412 } 2413 2414 // NewFloat32MemoTable returns a new memotable with num entries pre-allocated to reduce further 2415 // allocations when inserting. 2416 func NewFloat32MemoTable(num int64) *Float32MemoTable { 2417 return &Float32MemoTable{tbl: NewFloat32HashTable(uint64(num)), nullIdx: KeyNotFound} 2418 } 2419 2420 func (Float32MemoTable) TypeTraits() TypeTraits { 2421 return arrow.Float32Traits 2422 } 2423 2424 // Reset allows this table to be re-used by dumping all the data currently in the table. 2425 func (s *Float32MemoTable) Reset() { 2426 s.tbl.Reset(32) 2427 s.nullIdx = KeyNotFound 2428 } 2429 2430 // Size returns the current number of inserted elements into the table including if a null 2431 // has been inserted. 2432 func (s *Float32MemoTable) Size() int { 2433 sz := int(s.tbl.size) 2434 if _, ok := s.GetNull(); ok { 2435 sz++ 2436 } 2437 return sz 2438 } 2439 2440 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 2441 // that will be true if found and false if not. 2442 func (s *Float32MemoTable) GetNull() (int, bool) { 2443 return int(s.nullIdx), s.nullIdx != KeyNotFound 2444 } 2445 2446 // GetOrInsertNull will return the index of the null entry or insert a null entry 2447 // if one currently doesn't exist. The found value will be true if there was already 2448 // a null in the table, and false if it inserted one. 2449 func (s *Float32MemoTable) GetOrInsertNull() (idx int, found bool) { 2450 idx, found = s.GetNull() 2451 if !found { 2452 idx = s.Size() 2453 s.nullIdx = int32(idx) 2454 } 2455 return 2456 } 2457 2458 // CopyValues will copy the values from the memo table out into the passed in slice 2459 // which must be of the appropriate type. 2460 func (s *Float32MemoTable) CopyValues(out interface{}) { 2461 s.CopyValuesSubset(0, out) 2462 } 2463 2464 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 2465 // at the provided start index 2466 func (s *Float32MemoTable) CopyValuesSubset(start int, out interface{}) { 2467 s.tbl.CopyValuesSubset(start, out.([]float32)) 2468 } 2469 2470 func (s *Float32MemoTable) WriteOut(out []byte) { 2471 s.tbl.CopyValues(arrow.Float32Traits.CastFromBytes(out)) 2472 } 2473 2474 func (s *Float32MemoTable) WriteOutSubset(start int, out []byte) { 2475 s.tbl.CopyValuesSubset(start, arrow.Float32Traits.CastFromBytes(out)) 2476 } 2477 2478 func (s *Float32MemoTable) WriteOutLE(out []byte) { 2479 s.tbl.WriteOut(out) 2480 } 2481 2482 func (s *Float32MemoTable) WriteOutSubsetLE(start int, out []byte) { 2483 s.tbl.WriteOutSubset(start, out) 2484 } 2485 2486 // Get returns the index of the requested value in the hash table or KeyNotFound 2487 // along with a boolean indicating if it was found or not. 2488 func (s *Float32MemoTable) Get(val interface{}) (int, bool) { 2489 var cmp func(float32) bool 2490 2491 if math.IsNaN(float64(val.(float32))) { 2492 cmp = isNan32Cmp 2493 // use consistent internal bit pattern for NaN regardless of the pattern 2494 // that is passed to us. NaN is NaN is NaN 2495 val = float32(math.NaN()) 2496 } else { 2497 cmp = func(v float32) bool { return val.(float32) == v } 2498 } 2499 2500 h := hashFloat32(val.(float32), 0) 2501 if e, ok := s.tbl.Lookup(h, cmp); ok { 2502 return int(e.payload.memoIdx), ok 2503 } 2504 return KeyNotFound, false 2505 } 2506 2507 // GetOrInsert will return the index of the specified value in the table, or insert the 2508 // value into the table and return the new index. found indicates whether or not it already 2509 // existed in the table (true) or was inserted by this call (false). 2510 func (s *Float32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 2511 2512 var cmp func(float32) bool 2513 2514 if math.IsNaN(float64(val.(float32))) { 2515 cmp = isNan32Cmp 2516 // use consistent internal bit pattern for NaN regardless of the pattern 2517 // that is passed to us. NaN is NaN is NaN 2518 val = float32(math.NaN()) 2519 } else { 2520 cmp = func(v float32) bool { return val.(float32) == v } 2521 } 2522 2523 h := hashFloat32(val.(float32), 0) 2524 e, ok := s.tbl.Lookup(h, cmp) 2525 2526 if ok { 2527 idx = int(e.payload.memoIdx) 2528 found = true 2529 } else { 2530 idx = s.Size() 2531 s.tbl.Insert(e, h, val.(float32), int32(idx)) 2532 } 2533 return 2534 } 2535 2536 // GetOrInsertBytes is unimplemented 2537 func (s *Float32MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { 2538 panic("unimplemented") 2539 } 2540 2541 type payloadFloat64 struct { 2542 val float64 2543 memoIdx int32 2544 } 2545 2546 type entryFloat64 struct { 2547 h uint64 2548 payload payloadFloat64 2549 } 2550 2551 func (e entryFloat64) Valid() bool { return e.h != sentinel } 2552 2553 // Float64HashTable is a hashtable specifically for float64 that 2554 // is utilized with the MemoTable to generalize interactions for easier 2555 // implementation of dictionaries without losing performance. 2556 type Float64HashTable struct { 2557 cap uint64 2558 capMask uint64 2559 size uint64 2560 2561 entries []entryFloat64 2562 } 2563 2564 // NewFloat64HashTable returns a new hash table for float64 values 2565 // initialized with the passed in capacity or 32 whichever is larger. 2566 func NewFloat64HashTable(cap uint64) *Float64HashTable { 2567 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 2568 ret := &Float64HashTable{cap: initCap, capMask: initCap - 1, size: 0} 2569 ret.entries = make([]entryFloat64, initCap) 2570 return ret 2571 } 2572 2573 // Reset drops all of the values in this hash table and re-initializes it 2574 // with the specified initial capacity as if by calling New, but without having 2575 // to reallocate the object. 2576 func (h *Float64HashTable) Reset(cap uint64) { 2577 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) 2578 h.capMask = h.cap - 1 2579 h.size = 0 2580 h.entries = make([]entryFloat64, h.cap) 2581 } 2582 2583 // CopyValues is used for copying the values out of the hash table into the 2584 // passed in slice, in the order that they were first inserted 2585 func (h *Float64HashTable) CopyValues(out []float64) { 2586 h.CopyValuesSubset(0, out) 2587 } 2588 2589 // CopyValuesSubset copies a subset of the values in the hashtable out, starting 2590 // with the value at start, in the order that they were inserted. 2591 func (h *Float64HashTable) CopyValuesSubset(start int, out []float64) { 2592 h.VisitEntries(func(e *entryFloat64) { 2593 idx := e.payload.memoIdx - int32(start) 2594 if idx >= 0 { 2595 out[idx] = e.payload.val 2596 } 2597 }) 2598 } 2599 2600 func (h *Float64HashTable) WriteOut(out []byte) { 2601 h.WriteOutSubset(0, out) 2602 } 2603 2604 func (h *Float64HashTable) WriteOutSubset(start int, out []byte) { 2605 data := arrow.Float64Traits.CastFromBytes(out) 2606 h.VisitEntries(func(e *entryFloat64) { 2607 idx := e.payload.memoIdx - int32(start) 2608 if idx >= 0 { 2609 data[idx] = utils.ToLEFloat64(e.payload.val) 2610 } 2611 }) 2612 } 2613 2614 func (h *Float64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } 2615 2616 func (Float64HashTable) fixHash(v uint64) uint64 { 2617 if v == sentinel { 2618 return 42 2619 } 2620 return v 2621 } 2622 2623 // Lookup retrieves the entry for a given hash value assuming it's payload value returns 2624 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value, 2625 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. 2626 func (h *Float64HashTable) Lookup(v uint64, cmp func(float64) bool) (*entryFloat64, bool) { 2627 idx, ok := h.lookup(v, h.capMask, cmp) 2628 return &h.entries[idx], ok 2629 } 2630 2631 func (h *Float64HashTable) lookup(v uint64, szMask uint64, cmp func(float64) bool) (uint64, bool) { 2632 const perturbShift uint8 = 5 2633 2634 var ( 2635 idx uint64 2636 perturb uint64 2637 e *entryFloat64 2638 ) 2639 2640 v = h.fixHash(v) 2641 idx = v & szMask 2642 perturb = (v >> uint64(perturbShift)) + 1 2643 2644 for { 2645 e = &h.entries[idx] 2646 if e.h == v && cmp(e.payload.val) { 2647 return idx, true 2648 } 2649 2650 if e.h == sentinel { 2651 return idx, false 2652 } 2653 2654 // perturbation logic inspired from CPython's set/dict object 2655 // the goal is that all 64 bits of unmasked hash value eventually 2656 // participate int he probing sequence, to minimize clustering 2657 idx = (idx + perturb) & szMask 2658 perturb = (perturb >> uint64(perturbShift)) + 1 2659 } 2660 } 2661 2662 func (h *Float64HashTable) upsize(newcap uint64) error { 2663 newMask := newcap - 1 2664 2665 oldEntries := h.entries 2666 h.entries = make([]entryFloat64, newcap) 2667 for _, e := range oldEntries { 2668 if e.Valid() { 2669 idx, _ := h.lookup(e.h, newMask, func(float64) bool { return false }) 2670 h.entries[idx] = e 2671 } 2672 } 2673 h.cap = newcap 2674 h.capMask = newMask 2675 return nil 2676 } 2677 2678 // Insert updates the given entry with the provided hash value, payload value and memo index. 2679 // The entry pointer must have been retrieved via lookup in order to actually insert properly. 2680 func (h *Float64HashTable) Insert(e *entryFloat64, v uint64, val float64, memoIdx int32) error { 2681 e.h = h.fixHash(v) 2682 e.payload.val = val 2683 e.payload.memoIdx = memoIdx 2684 h.size++ 2685 2686 if h.needUpsize() { 2687 h.upsize(h.cap * uint64(loadFactor) * 2) 2688 } 2689 return nil 2690 } 2691 2692 // VisitEntries will call the passed in function on each *valid* entry in the hash table, 2693 // a valid entry being one which has had a value inserted into it. 2694 func (h *Float64HashTable) VisitEntries(visit func(*entryFloat64)) { 2695 for _, e := range h.entries { 2696 if e.Valid() { 2697 visit(&e) 2698 } 2699 } 2700 } 2701 2702 // Float64MemoTable is a wrapper over the appropriate hashtable to provide an interface 2703 // conforming to the MemoTable interface defined in the encoding package for general interactions 2704 // regarding dictionaries. 2705 type Float64MemoTable struct { 2706 tbl *Float64HashTable 2707 nullIdx int32 2708 } 2709 2710 // NewFloat64MemoTable returns a new memotable with num entries pre-allocated to reduce further 2711 // allocations when inserting. 2712 func NewFloat64MemoTable(num int64) *Float64MemoTable { 2713 return &Float64MemoTable{tbl: NewFloat64HashTable(uint64(num)), nullIdx: KeyNotFound} 2714 } 2715 2716 func (Float64MemoTable) TypeTraits() TypeTraits { 2717 return arrow.Float64Traits 2718 } 2719 2720 // Reset allows this table to be re-used by dumping all the data currently in the table. 2721 func (s *Float64MemoTable) Reset() { 2722 s.tbl.Reset(32) 2723 s.nullIdx = KeyNotFound 2724 } 2725 2726 // Size returns the current number of inserted elements into the table including if a null 2727 // has been inserted. 2728 func (s *Float64MemoTable) Size() int { 2729 sz := int(s.tbl.size) 2730 if _, ok := s.GetNull(); ok { 2731 sz++ 2732 } 2733 return sz 2734 } 2735 2736 // GetNull returns the index of an inserted null or KeyNotFound along with a bool 2737 // that will be true if found and false if not. 2738 func (s *Float64MemoTable) GetNull() (int, bool) { 2739 return int(s.nullIdx), s.nullIdx != KeyNotFound 2740 } 2741 2742 // GetOrInsertNull will return the index of the null entry or insert a null entry 2743 // if one currently doesn't exist. The found value will be true if there was already 2744 // a null in the table, and false if it inserted one. 2745 func (s *Float64MemoTable) GetOrInsertNull() (idx int, found bool) { 2746 idx, found = s.GetNull() 2747 if !found { 2748 idx = s.Size() 2749 s.nullIdx = int32(idx) 2750 } 2751 return 2752 } 2753 2754 // CopyValues will copy the values from the memo table out into the passed in slice 2755 // which must be of the appropriate type. 2756 func (s *Float64MemoTable) CopyValues(out interface{}) { 2757 s.CopyValuesSubset(0, out) 2758 } 2759 2760 // CopyValuesSubset is like CopyValues but only copies a subset of values starting 2761 // at the provided start index 2762 func (s *Float64MemoTable) CopyValuesSubset(start int, out interface{}) { 2763 s.tbl.CopyValuesSubset(start, out.([]float64)) 2764 } 2765 2766 func (s *Float64MemoTable) WriteOut(out []byte) { 2767 s.tbl.CopyValues(arrow.Float64Traits.CastFromBytes(out)) 2768 } 2769 2770 func (s *Float64MemoTable) WriteOutSubset(start int, out []byte) { 2771 s.tbl.CopyValuesSubset(start, arrow.Float64Traits.CastFromBytes(out)) 2772 } 2773 2774 func (s *Float64MemoTable) WriteOutLE(out []byte) { 2775 s.tbl.WriteOut(out) 2776 } 2777 2778 func (s *Float64MemoTable) WriteOutSubsetLE(start int, out []byte) { 2779 s.tbl.WriteOutSubset(start, out) 2780 } 2781 2782 // Get returns the index of the requested value in the hash table or KeyNotFound 2783 // along with a boolean indicating if it was found or not. 2784 func (s *Float64MemoTable) Get(val interface{}) (int, bool) { 2785 var cmp func(float64) bool 2786 if math.IsNaN(val.(float64)) { 2787 cmp = math.IsNaN 2788 // use consistent internal bit pattern for NaN regardless of the pattern 2789 // that is passed to us. NaN is NaN is NaN 2790 val = math.NaN() 2791 } else { 2792 cmp = func(v float64) bool { return val.(float64) == v } 2793 } 2794 2795 h := hashFloat64(val.(float64), 0) 2796 if e, ok := s.tbl.Lookup(h, cmp); ok { 2797 return int(e.payload.memoIdx), ok 2798 } 2799 return KeyNotFound, false 2800 } 2801 2802 // GetOrInsert will return the index of the specified value in the table, or insert the 2803 // value into the table and return the new index. found indicates whether or not it already 2804 // existed in the table (true) or was inserted by this call (false). 2805 func (s *Float64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { 2806 2807 var cmp func(float64) bool 2808 if math.IsNaN(val.(float64)) { 2809 cmp = math.IsNaN 2810 // use consistent internal bit pattern for NaN regardless of the pattern 2811 // that is passed to us. NaN is NaN is NaN 2812 val = math.NaN() 2813 } else { 2814 cmp = func(v float64) bool { return val.(float64) == v } 2815 } 2816 2817 h := hashFloat64(val.(float64), 0) 2818 e, ok := s.tbl.Lookup(h, cmp) 2819 2820 if ok { 2821 idx = int(e.payload.memoIdx) 2822 found = true 2823 } else { 2824 idx = s.Size() 2825 s.tbl.Insert(e, h, val.(float64), int32(idx)) 2826 } 2827 return 2828 } 2829 2830 // GetOrInsertBytes is unimplemented 2831 func (s *Float64MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { 2832 panic("unimplemented") 2833 }