github.com/apache/arrow/go/v14@v14.0.1/internal/hashing/xxh3_memo_table.gen.go (about)

     1  // Code generated by xxh3_memo_table.gen.go.tmpl. DO NOT EDIT.
     2  
     3  // Licensed to the Apache Software Foundation (ASF) under one
     4  // or more contributor license agreements.  See the NOTICE file
     5  // distributed with this work for additional information
     6  // regarding copyright ownership.  The ASF licenses this file
     7  // to you under the Apache License, Version 2.0 (the
     8  // "License"); you may not use this file except in compliance
     9  // with the License.  You may obtain a copy of the License at
    10  //
    11  // http://www.apache.org/licenses/LICENSE-2.0
    12  //
    13  // Unless required by applicable law or agreed to in writing, software
    14  // distributed under the License is distributed on an "AS IS" BASIS,
    15  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  // See the License for the specific language governing permissions and
    17  // limitations under the License.
    18  
    19  package hashing
    20  
    21  import (
    22  	"math"
    23  
    24  	"github.com/apache/arrow/go/v14/arrow"
    25  	"github.com/apache/arrow/go/v14/arrow/bitutil"
    26  	"github.com/apache/arrow/go/v14/internal/utils"
    27  )
    28  
    29  type payloadInt8 struct {
    30  	val     int8
    31  	memoIdx int32
    32  }
    33  
    34  type entryInt8 struct {
    35  	h       uint64
    36  	payload payloadInt8
    37  }
    38  
    39  func (e entryInt8) Valid() bool { return e.h != sentinel }
    40  
    41  // Int8HashTable is a hashtable specifically for int8 that
    42  // is utilized with the MemoTable to generalize interactions for easier
    43  // implementation of dictionaries without losing performance.
    44  type Int8HashTable struct {
    45  	cap     uint64
    46  	capMask uint64
    47  	size    uint64
    48  
    49  	entries []entryInt8
    50  }
    51  
    52  // NewInt8HashTable returns a new hash table for int8 values
    53  // initialized with the passed in capacity or 32 whichever is larger.
    54  func NewInt8HashTable(cap uint64) *Int8HashTable {
    55  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
    56  	ret := &Int8HashTable{cap: initCap, capMask: initCap - 1, size: 0}
    57  	ret.entries = make([]entryInt8, initCap)
    58  	return ret
    59  }
    60  
    61  // Reset drops all of the values in this hash table and re-initializes it
    62  // with the specified initial capacity as if by calling New, but without having
    63  // to reallocate the object.
    64  func (h *Int8HashTable) Reset(cap uint64) {
    65  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
    66  	h.capMask = h.cap - 1
    67  	h.size = 0
    68  	h.entries = make([]entryInt8, h.cap)
    69  }
    70  
    71  // CopyValues is used for copying the values out of the hash table into the
    72  // passed in slice, in the order that they were first inserted
    73  func (h *Int8HashTable) CopyValues(out []int8) {
    74  	h.CopyValuesSubset(0, out)
    75  }
    76  
    77  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
    78  // with the value at start, in the order that they were inserted.
    79  func (h *Int8HashTable) CopyValuesSubset(start int, out []int8) {
    80  	h.VisitEntries(func(e *entryInt8) {
    81  		idx := e.payload.memoIdx - int32(start)
    82  		if idx >= 0 {
    83  			out[idx] = e.payload.val
    84  		}
    85  	})
    86  }
    87  
    88  func (h *Int8HashTable) WriteOut(out []byte) {
    89  	h.WriteOutSubset(0, out)
    90  }
    91  
    92  func (h *Int8HashTable) WriteOutSubset(start int, out []byte) {
    93  	data := arrow.Int8Traits.CastFromBytes(out)
    94  	h.VisitEntries(func(e *entryInt8) {
    95  		idx := e.payload.memoIdx - int32(start)
    96  		if idx >= 0 {
    97  			data[idx] = e.payload.val
    98  		}
    99  	})
   100  }
   101  
   102  func (h *Int8HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
   103  
   104  func (Int8HashTable) fixHash(v uint64) uint64 {
   105  	if v == sentinel {
   106  		return 42
   107  	}
   108  	return v
   109  }
   110  
   111  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
   112  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
   113  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
   114  func (h *Int8HashTable) Lookup(v uint64, cmp func(int8) bool) (*entryInt8, bool) {
   115  	idx, ok := h.lookup(v, h.capMask, cmp)
   116  	return &h.entries[idx], ok
   117  }
   118  
   119  func (h *Int8HashTable) lookup(v uint64, szMask uint64, cmp func(int8) bool) (uint64, bool) {
   120  	const perturbShift uint8 = 5
   121  
   122  	var (
   123  		idx     uint64
   124  		perturb uint64
   125  		e       *entryInt8
   126  	)
   127  
   128  	v = h.fixHash(v)
   129  	idx = v & szMask
   130  	perturb = (v >> uint64(perturbShift)) + 1
   131  
   132  	for {
   133  		e = &h.entries[idx]
   134  		if e.h == v && cmp(e.payload.val) {
   135  			return idx, true
   136  		}
   137  
   138  		if e.h == sentinel {
   139  			return idx, false
   140  		}
   141  
   142  		// perturbation logic inspired from CPython's set/dict object
   143  		// the goal is that all 64 bits of unmasked hash value eventually
   144  		// participate int he probing sequence, to minimize clustering
   145  		idx = (idx + perturb) & szMask
   146  		perturb = (perturb >> uint64(perturbShift)) + 1
   147  	}
   148  }
   149  
   150  func (h *Int8HashTable) upsize(newcap uint64) error {
   151  	newMask := newcap - 1
   152  
   153  	oldEntries := h.entries
   154  	h.entries = make([]entryInt8, newcap)
   155  	for _, e := range oldEntries {
   156  		if e.Valid() {
   157  			idx, _ := h.lookup(e.h, newMask, func(int8) bool { return false })
   158  			h.entries[idx] = e
   159  		}
   160  	}
   161  	h.cap = newcap
   162  	h.capMask = newMask
   163  	return nil
   164  }
   165  
   166  // Insert updates the given entry with the provided hash value, payload value and memo index.
   167  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
   168  func (h *Int8HashTable) Insert(e *entryInt8, v uint64, val int8, memoIdx int32) error {
   169  	e.h = h.fixHash(v)
   170  	e.payload.val = val
   171  	e.payload.memoIdx = memoIdx
   172  	h.size++
   173  
   174  	if h.needUpsize() {
   175  		h.upsize(h.cap * uint64(loadFactor) * 2)
   176  	}
   177  	return nil
   178  }
   179  
   180  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
   181  // a valid entry being one which has had a value inserted into it.
   182  func (h *Int8HashTable) VisitEntries(visit func(*entryInt8)) {
   183  	for _, e := range h.entries {
   184  		if e.Valid() {
   185  			visit(&e)
   186  		}
   187  	}
   188  }
   189  
   190  // Int8MemoTable is a wrapper over the appropriate hashtable to provide an interface
   191  // conforming to the MemoTable interface defined in the encoding package for general interactions
   192  // regarding dictionaries.
   193  type Int8MemoTable struct {
   194  	tbl     *Int8HashTable
   195  	nullIdx int32
   196  }
   197  
   198  // NewInt8MemoTable returns a new memotable with num entries pre-allocated to reduce further
   199  // allocations when inserting.
   200  func NewInt8MemoTable(num int64) *Int8MemoTable {
   201  	return &Int8MemoTable{tbl: NewInt8HashTable(uint64(num)), nullIdx: KeyNotFound}
   202  }
   203  
   204  func (Int8MemoTable) TypeTraits() TypeTraits {
   205  	return arrow.Int8Traits
   206  }
   207  
   208  // Reset allows this table to be re-used by dumping all the data currently in the table.
   209  func (s *Int8MemoTable) Reset() {
   210  	s.tbl.Reset(32)
   211  	s.nullIdx = KeyNotFound
   212  }
   213  
   214  // Size returns the current number of inserted elements into the table including if a null
   215  // has been inserted.
   216  func (s *Int8MemoTable) Size() int {
   217  	sz := int(s.tbl.size)
   218  	if _, ok := s.GetNull(); ok {
   219  		sz++
   220  	}
   221  	return sz
   222  }
   223  
   224  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
   225  // that will be true if found and false if not.
   226  func (s *Int8MemoTable) GetNull() (int, bool) {
   227  	return int(s.nullIdx), s.nullIdx != KeyNotFound
   228  }
   229  
   230  // GetOrInsertNull will return the index of the null entry or insert a null entry
   231  // if one currently doesn't exist. The found value will be true if there was already
   232  // a null in the table, and false if it inserted one.
   233  func (s *Int8MemoTable) GetOrInsertNull() (idx int, found bool) {
   234  	idx, found = s.GetNull()
   235  	if !found {
   236  		idx = s.Size()
   237  		s.nullIdx = int32(idx)
   238  	}
   239  	return
   240  }
   241  
   242  // CopyValues will copy the values from the memo table out into the passed in slice
   243  // which must be of the appropriate type.
   244  func (s *Int8MemoTable) CopyValues(out interface{}) {
   245  	s.CopyValuesSubset(0, out)
   246  }
   247  
   248  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
   249  // at the provided start index
   250  func (s *Int8MemoTable) CopyValuesSubset(start int, out interface{}) {
   251  	s.tbl.CopyValuesSubset(start, out.([]int8))
   252  }
   253  
   254  func (s *Int8MemoTable) WriteOut(out []byte) {
   255  	s.tbl.CopyValues(arrow.Int8Traits.CastFromBytes(out))
   256  }
   257  
   258  func (s *Int8MemoTable) WriteOutSubset(start int, out []byte) {
   259  	s.tbl.CopyValuesSubset(start, arrow.Int8Traits.CastFromBytes(out))
   260  }
   261  
   262  func (s *Int8MemoTable) WriteOutLE(out []byte) {
   263  	s.tbl.WriteOut(out)
   264  }
   265  
   266  func (s *Int8MemoTable) WriteOutSubsetLE(start int, out []byte) {
   267  	s.tbl.WriteOutSubset(start, out)
   268  }
   269  
   270  // Get returns the index of the requested value in the hash table or KeyNotFound
   271  // along with a boolean indicating if it was found or not.
   272  func (s *Int8MemoTable) Get(val interface{}) (int, bool) {
   273  
   274  	h := hashInt(uint64(val.(int8)), 0)
   275  	if e, ok := s.tbl.Lookup(h, func(v int8) bool { return val.(int8) == v }); ok {
   276  		return int(e.payload.memoIdx), ok
   277  	}
   278  	return KeyNotFound, false
   279  }
   280  
   281  // GetOrInsert will return the index of the specified value in the table, or insert the
   282  // value into the table and return the new index. found indicates whether or not it already
   283  // existed in the table (true) or was inserted by this call (false).
   284  func (s *Int8MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
   285  
   286  	h := hashInt(uint64(val.(int8)), 0)
   287  	e, ok := s.tbl.Lookup(h, func(v int8) bool {
   288  		return val.(int8) == v
   289  	})
   290  
   291  	if ok {
   292  		idx = int(e.payload.memoIdx)
   293  		found = true
   294  	} else {
   295  		idx = s.Size()
   296  		s.tbl.Insert(e, h, val.(int8), int32(idx))
   297  	}
   298  	return
   299  }
   300  
   301  // GetOrInsertBytes is unimplemented
   302  func (s *Int8MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
   303  	panic("unimplemented")
   304  }
   305  
   306  type payloadUint8 struct {
   307  	val     uint8
   308  	memoIdx int32
   309  }
   310  
   311  type entryUint8 struct {
   312  	h       uint64
   313  	payload payloadUint8
   314  }
   315  
   316  func (e entryUint8) Valid() bool { return e.h != sentinel }
   317  
   318  // Uint8HashTable is a hashtable specifically for uint8 that
   319  // is utilized with the MemoTable to generalize interactions for easier
   320  // implementation of dictionaries without losing performance.
   321  type Uint8HashTable struct {
   322  	cap     uint64
   323  	capMask uint64
   324  	size    uint64
   325  
   326  	entries []entryUint8
   327  }
   328  
   329  // NewUint8HashTable returns a new hash table for uint8 values
   330  // initialized with the passed in capacity or 32 whichever is larger.
   331  func NewUint8HashTable(cap uint64) *Uint8HashTable {
   332  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
   333  	ret := &Uint8HashTable{cap: initCap, capMask: initCap - 1, size: 0}
   334  	ret.entries = make([]entryUint8, initCap)
   335  	return ret
   336  }
   337  
   338  // Reset drops all of the values in this hash table and re-initializes it
   339  // with the specified initial capacity as if by calling New, but without having
   340  // to reallocate the object.
   341  func (h *Uint8HashTable) Reset(cap uint64) {
   342  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
   343  	h.capMask = h.cap - 1
   344  	h.size = 0
   345  	h.entries = make([]entryUint8, h.cap)
   346  }
   347  
   348  // CopyValues is used for copying the values out of the hash table into the
   349  // passed in slice, in the order that they were first inserted
   350  func (h *Uint8HashTable) CopyValues(out []uint8) {
   351  	h.CopyValuesSubset(0, out)
   352  }
   353  
   354  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
   355  // with the value at start, in the order that they were inserted.
   356  func (h *Uint8HashTable) CopyValuesSubset(start int, out []uint8) {
   357  	h.VisitEntries(func(e *entryUint8) {
   358  		idx := e.payload.memoIdx - int32(start)
   359  		if idx >= 0 {
   360  			out[idx] = e.payload.val
   361  		}
   362  	})
   363  }
   364  
   365  func (h *Uint8HashTable) WriteOut(out []byte) {
   366  	h.WriteOutSubset(0, out)
   367  }
   368  
   369  func (h *Uint8HashTable) WriteOutSubset(start int, out []byte) {
   370  	data := arrow.Uint8Traits.CastFromBytes(out)
   371  	h.VisitEntries(func(e *entryUint8) {
   372  		idx := e.payload.memoIdx - int32(start)
   373  		if idx >= 0 {
   374  			data[idx] = e.payload.val
   375  		}
   376  	})
   377  }
   378  
   379  func (h *Uint8HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
   380  
   381  func (Uint8HashTable) fixHash(v uint64) uint64 {
   382  	if v == sentinel {
   383  		return 42
   384  	}
   385  	return v
   386  }
   387  
   388  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
   389  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
   390  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
   391  func (h *Uint8HashTable) Lookup(v uint64, cmp func(uint8) bool) (*entryUint8, bool) {
   392  	idx, ok := h.lookup(v, h.capMask, cmp)
   393  	return &h.entries[idx], ok
   394  }
   395  
   396  func (h *Uint8HashTable) lookup(v uint64, szMask uint64, cmp func(uint8) bool) (uint64, bool) {
   397  	const perturbShift uint8 = 5
   398  
   399  	var (
   400  		idx     uint64
   401  		perturb uint64
   402  		e       *entryUint8
   403  	)
   404  
   405  	v = h.fixHash(v)
   406  	idx = v & szMask
   407  	perturb = (v >> uint64(perturbShift)) + 1
   408  
   409  	for {
   410  		e = &h.entries[idx]
   411  		if e.h == v && cmp(e.payload.val) {
   412  			return idx, true
   413  		}
   414  
   415  		if e.h == sentinel {
   416  			return idx, false
   417  		}
   418  
   419  		// perturbation logic inspired from CPython's set/dict object
   420  		// the goal is that all 64 bits of unmasked hash value eventually
   421  		// participate int he probing sequence, to minimize clustering
   422  		idx = (idx + perturb) & szMask
   423  		perturb = (perturb >> uint64(perturbShift)) + 1
   424  	}
   425  }
   426  
   427  func (h *Uint8HashTable) upsize(newcap uint64) error {
   428  	newMask := newcap - 1
   429  
   430  	oldEntries := h.entries
   431  	h.entries = make([]entryUint8, newcap)
   432  	for _, e := range oldEntries {
   433  		if e.Valid() {
   434  			idx, _ := h.lookup(e.h, newMask, func(uint8) bool { return false })
   435  			h.entries[idx] = e
   436  		}
   437  	}
   438  	h.cap = newcap
   439  	h.capMask = newMask
   440  	return nil
   441  }
   442  
   443  // Insert updates the given entry with the provided hash value, payload value and memo index.
   444  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
   445  func (h *Uint8HashTable) Insert(e *entryUint8, v uint64, val uint8, memoIdx int32) error {
   446  	e.h = h.fixHash(v)
   447  	e.payload.val = val
   448  	e.payload.memoIdx = memoIdx
   449  	h.size++
   450  
   451  	if h.needUpsize() {
   452  		h.upsize(h.cap * uint64(loadFactor) * 2)
   453  	}
   454  	return nil
   455  }
   456  
   457  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
   458  // a valid entry being one which has had a value inserted into it.
   459  func (h *Uint8HashTable) VisitEntries(visit func(*entryUint8)) {
   460  	for _, e := range h.entries {
   461  		if e.Valid() {
   462  			visit(&e)
   463  		}
   464  	}
   465  }
   466  
   467  // Uint8MemoTable is a wrapper over the appropriate hashtable to provide an interface
   468  // conforming to the MemoTable interface defined in the encoding package for general interactions
   469  // regarding dictionaries.
   470  type Uint8MemoTable struct {
   471  	tbl     *Uint8HashTable
   472  	nullIdx int32
   473  }
   474  
   475  // NewUint8MemoTable returns a new memotable with num entries pre-allocated to reduce further
   476  // allocations when inserting.
   477  func NewUint8MemoTable(num int64) *Uint8MemoTable {
   478  	return &Uint8MemoTable{tbl: NewUint8HashTable(uint64(num)), nullIdx: KeyNotFound}
   479  }
   480  
   481  func (Uint8MemoTable) TypeTraits() TypeTraits {
   482  	return arrow.Uint8Traits
   483  }
   484  
   485  // Reset allows this table to be re-used by dumping all the data currently in the table.
   486  func (s *Uint8MemoTable) Reset() {
   487  	s.tbl.Reset(32)
   488  	s.nullIdx = KeyNotFound
   489  }
   490  
   491  // Size returns the current number of inserted elements into the table including if a null
   492  // has been inserted.
   493  func (s *Uint8MemoTable) Size() int {
   494  	sz := int(s.tbl.size)
   495  	if _, ok := s.GetNull(); ok {
   496  		sz++
   497  	}
   498  	return sz
   499  }
   500  
   501  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
   502  // that will be true if found and false if not.
   503  func (s *Uint8MemoTable) GetNull() (int, bool) {
   504  	return int(s.nullIdx), s.nullIdx != KeyNotFound
   505  }
   506  
   507  // GetOrInsertNull will return the index of the null entry or insert a null entry
   508  // if one currently doesn't exist. The found value will be true if there was already
   509  // a null in the table, and false if it inserted one.
   510  func (s *Uint8MemoTable) GetOrInsertNull() (idx int, found bool) {
   511  	idx, found = s.GetNull()
   512  	if !found {
   513  		idx = s.Size()
   514  		s.nullIdx = int32(idx)
   515  	}
   516  	return
   517  }
   518  
   519  // CopyValues will copy the values from the memo table out into the passed in slice
   520  // which must be of the appropriate type.
   521  func (s *Uint8MemoTable) CopyValues(out interface{}) {
   522  	s.CopyValuesSubset(0, out)
   523  }
   524  
   525  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
   526  // at the provided start index
   527  func (s *Uint8MemoTable) CopyValuesSubset(start int, out interface{}) {
   528  	s.tbl.CopyValuesSubset(start, out.([]uint8))
   529  }
   530  
   531  func (s *Uint8MemoTable) WriteOut(out []byte) {
   532  	s.tbl.CopyValues(arrow.Uint8Traits.CastFromBytes(out))
   533  }
   534  
   535  func (s *Uint8MemoTable) WriteOutSubset(start int, out []byte) {
   536  	s.tbl.CopyValuesSubset(start, arrow.Uint8Traits.CastFromBytes(out))
   537  }
   538  
   539  func (s *Uint8MemoTable) WriteOutLE(out []byte) {
   540  	s.tbl.WriteOut(out)
   541  }
   542  
   543  func (s *Uint8MemoTable) WriteOutSubsetLE(start int, out []byte) {
   544  	s.tbl.WriteOutSubset(start, out)
   545  }
   546  
   547  // Get returns the index of the requested value in the hash table or KeyNotFound
   548  // along with a boolean indicating if it was found or not.
   549  func (s *Uint8MemoTable) Get(val interface{}) (int, bool) {
   550  
   551  	h := hashInt(uint64(val.(uint8)), 0)
   552  	if e, ok := s.tbl.Lookup(h, func(v uint8) bool { return val.(uint8) == v }); ok {
   553  		return int(e.payload.memoIdx), ok
   554  	}
   555  	return KeyNotFound, false
   556  }
   557  
   558  // GetOrInsert will return the index of the specified value in the table, or insert the
   559  // value into the table and return the new index. found indicates whether or not it already
   560  // existed in the table (true) or was inserted by this call (false).
   561  func (s *Uint8MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
   562  
   563  	h := hashInt(uint64(val.(uint8)), 0)
   564  	e, ok := s.tbl.Lookup(h, func(v uint8) bool {
   565  		return val.(uint8) == v
   566  	})
   567  
   568  	if ok {
   569  		idx = int(e.payload.memoIdx)
   570  		found = true
   571  	} else {
   572  		idx = s.Size()
   573  		s.tbl.Insert(e, h, val.(uint8), int32(idx))
   574  	}
   575  	return
   576  }
   577  
   578  // GetOrInsertBytes is unimplemented
   579  func (s *Uint8MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
   580  	panic("unimplemented")
   581  }
   582  
   583  type payloadInt16 struct {
   584  	val     int16
   585  	memoIdx int32
   586  }
   587  
   588  type entryInt16 struct {
   589  	h       uint64
   590  	payload payloadInt16
   591  }
   592  
   593  func (e entryInt16) Valid() bool { return e.h != sentinel }
   594  
   595  // Int16HashTable is a hashtable specifically for int16 that
   596  // is utilized with the MemoTable to generalize interactions for easier
   597  // implementation of dictionaries without losing performance.
   598  type Int16HashTable struct {
   599  	cap     uint64
   600  	capMask uint64
   601  	size    uint64
   602  
   603  	entries []entryInt16
   604  }
   605  
   606  // NewInt16HashTable returns a new hash table for int16 values
   607  // initialized with the passed in capacity or 32 whichever is larger.
   608  func NewInt16HashTable(cap uint64) *Int16HashTable {
   609  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
   610  	ret := &Int16HashTable{cap: initCap, capMask: initCap - 1, size: 0}
   611  	ret.entries = make([]entryInt16, initCap)
   612  	return ret
   613  }
   614  
   615  // Reset drops all of the values in this hash table and re-initializes it
   616  // with the specified initial capacity as if by calling New, but without having
   617  // to reallocate the object.
   618  func (h *Int16HashTable) Reset(cap uint64) {
   619  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
   620  	h.capMask = h.cap - 1
   621  	h.size = 0
   622  	h.entries = make([]entryInt16, h.cap)
   623  }
   624  
   625  // CopyValues is used for copying the values out of the hash table into the
   626  // passed in slice, in the order that they were first inserted
   627  func (h *Int16HashTable) CopyValues(out []int16) {
   628  	h.CopyValuesSubset(0, out)
   629  }
   630  
   631  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
   632  // with the value at start, in the order that they were inserted.
   633  func (h *Int16HashTable) CopyValuesSubset(start int, out []int16) {
   634  	h.VisitEntries(func(e *entryInt16) {
   635  		idx := e.payload.memoIdx - int32(start)
   636  		if idx >= 0 {
   637  			out[idx] = e.payload.val
   638  		}
   639  	})
   640  }
   641  
   642  func (h *Int16HashTable) WriteOut(out []byte) {
   643  	h.WriteOutSubset(0, out)
   644  }
   645  
   646  func (h *Int16HashTable) WriteOutSubset(start int, out []byte) {
   647  	data := arrow.Int16Traits.CastFromBytes(out)
   648  	h.VisitEntries(func(e *entryInt16) {
   649  		idx := e.payload.memoIdx - int32(start)
   650  		if idx >= 0 {
   651  			data[idx] = utils.ToLEInt16(e.payload.val)
   652  		}
   653  	})
   654  }
   655  
   656  func (h *Int16HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
   657  
   658  func (Int16HashTable) fixHash(v uint64) uint64 {
   659  	if v == sentinel {
   660  		return 42
   661  	}
   662  	return v
   663  }
   664  
   665  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
   666  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
   667  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
   668  func (h *Int16HashTable) Lookup(v uint64, cmp func(int16) bool) (*entryInt16, bool) {
   669  	idx, ok := h.lookup(v, h.capMask, cmp)
   670  	return &h.entries[idx], ok
   671  }
   672  
   673  func (h *Int16HashTable) lookup(v uint64, szMask uint64, cmp func(int16) bool) (uint64, bool) {
   674  	const perturbShift uint8 = 5
   675  
   676  	var (
   677  		idx     uint64
   678  		perturb uint64
   679  		e       *entryInt16
   680  	)
   681  
   682  	v = h.fixHash(v)
   683  	idx = v & szMask
   684  	perturb = (v >> uint64(perturbShift)) + 1
   685  
   686  	for {
   687  		e = &h.entries[idx]
   688  		if e.h == v && cmp(e.payload.val) {
   689  			return idx, true
   690  		}
   691  
   692  		if e.h == sentinel {
   693  			return idx, false
   694  		}
   695  
   696  		// perturbation logic inspired from CPython's set/dict object
   697  		// the goal is that all 64 bits of unmasked hash value eventually
   698  		// participate int he probing sequence, to minimize clustering
   699  		idx = (idx + perturb) & szMask
   700  		perturb = (perturb >> uint64(perturbShift)) + 1
   701  	}
   702  }
   703  
   704  func (h *Int16HashTable) upsize(newcap uint64) error {
   705  	newMask := newcap - 1
   706  
   707  	oldEntries := h.entries
   708  	h.entries = make([]entryInt16, newcap)
   709  	for _, e := range oldEntries {
   710  		if e.Valid() {
   711  			idx, _ := h.lookup(e.h, newMask, func(int16) bool { return false })
   712  			h.entries[idx] = e
   713  		}
   714  	}
   715  	h.cap = newcap
   716  	h.capMask = newMask
   717  	return nil
   718  }
   719  
   720  // Insert updates the given entry with the provided hash value, payload value and memo index.
   721  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
   722  func (h *Int16HashTable) Insert(e *entryInt16, v uint64, val int16, memoIdx int32) error {
   723  	e.h = h.fixHash(v)
   724  	e.payload.val = val
   725  	e.payload.memoIdx = memoIdx
   726  	h.size++
   727  
   728  	if h.needUpsize() {
   729  		h.upsize(h.cap * uint64(loadFactor) * 2)
   730  	}
   731  	return nil
   732  }
   733  
   734  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
   735  // a valid entry being one which has had a value inserted into it.
   736  func (h *Int16HashTable) VisitEntries(visit func(*entryInt16)) {
   737  	for _, e := range h.entries {
   738  		if e.Valid() {
   739  			visit(&e)
   740  		}
   741  	}
   742  }
   743  
   744  // Int16MemoTable is a wrapper over the appropriate hashtable to provide an interface
   745  // conforming to the MemoTable interface defined in the encoding package for general interactions
   746  // regarding dictionaries.
   747  type Int16MemoTable struct {
   748  	tbl     *Int16HashTable
   749  	nullIdx int32
   750  }
   751  
   752  // NewInt16MemoTable returns a new memotable with num entries pre-allocated to reduce further
   753  // allocations when inserting.
   754  func NewInt16MemoTable(num int64) *Int16MemoTable {
   755  	return &Int16MemoTable{tbl: NewInt16HashTable(uint64(num)), nullIdx: KeyNotFound}
   756  }
   757  
   758  func (Int16MemoTable) TypeTraits() TypeTraits {
   759  	return arrow.Int16Traits
   760  }
   761  
   762  // Reset allows this table to be re-used by dumping all the data currently in the table.
   763  func (s *Int16MemoTable) Reset() {
   764  	s.tbl.Reset(32)
   765  	s.nullIdx = KeyNotFound
   766  }
   767  
   768  // Size returns the current number of inserted elements into the table including if a null
   769  // has been inserted.
   770  func (s *Int16MemoTable) Size() int {
   771  	sz := int(s.tbl.size)
   772  	if _, ok := s.GetNull(); ok {
   773  		sz++
   774  	}
   775  	return sz
   776  }
   777  
   778  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
   779  // that will be true if found and false if not.
   780  func (s *Int16MemoTable) GetNull() (int, bool) {
   781  	return int(s.nullIdx), s.nullIdx != KeyNotFound
   782  }
   783  
   784  // GetOrInsertNull will return the index of the null entry or insert a null entry
   785  // if one currently doesn't exist. The found value will be true if there was already
   786  // a null in the table, and false if it inserted one.
   787  func (s *Int16MemoTable) GetOrInsertNull() (idx int, found bool) {
   788  	idx, found = s.GetNull()
   789  	if !found {
   790  		idx = s.Size()
   791  		s.nullIdx = int32(idx)
   792  	}
   793  	return
   794  }
   795  
   796  // CopyValues will copy the values from the memo table out into the passed in slice
   797  // which must be of the appropriate type.
   798  func (s *Int16MemoTable) CopyValues(out interface{}) {
   799  	s.CopyValuesSubset(0, out)
   800  }
   801  
   802  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
   803  // at the provided start index
   804  func (s *Int16MemoTable) CopyValuesSubset(start int, out interface{}) {
   805  	s.tbl.CopyValuesSubset(start, out.([]int16))
   806  }
   807  
   808  func (s *Int16MemoTable) WriteOut(out []byte) {
   809  	s.tbl.CopyValues(arrow.Int16Traits.CastFromBytes(out))
   810  }
   811  
   812  func (s *Int16MemoTable) WriteOutSubset(start int, out []byte) {
   813  	s.tbl.CopyValuesSubset(start, arrow.Int16Traits.CastFromBytes(out))
   814  }
   815  
   816  func (s *Int16MemoTable) WriteOutLE(out []byte) {
   817  	s.tbl.WriteOut(out)
   818  }
   819  
   820  func (s *Int16MemoTable) WriteOutSubsetLE(start int, out []byte) {
   821  	s.tbl.WriteOutSubset(start, out)
   822  }
   823  
   824  // Get returns the index of the requested value in the hash table or KeyNotFound
   825  // along with a boolean indicating if it was found or not.
   826  func (s *Int16MemoTable) Get(val interface{}) (int, bool) {
   827  
   828  	h := hashInt(uint64(val.(int16)), 0)
   829  	if e, ok := s.tbl.Lookup(h, func(v int16) bool { return val.(int16) == v }); ok {
   830  		return int(e.payload.memoIdx), ok
   831  	}
   832  	return KeyNotFound, false
   833  }
   834  
   835  // GetOrInsert will return the index of the specified value in the table, or insert the
   836  // value into the table and return the new index. found indicates whether or not it already
   837  // existed in the table (true) or was inserted by this call (false).
   838  func (s *Int16MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
   839  
   840  	h := hashInt(uint64(val.(int16)), 0)
   841  	e, ok := s.tbl.Lookup(h, func(v int16) bool {
   842  		return val.(int16) == v
   843  	})
   844  
   845  	if ok {
   846  		idx = int(e.payload.memoIdx)
   847  		found = true
   848  	} else {
   849  		idx = s.Size()
   850  		s.tbl.Insert(e, h, val.(int16), int32(idx))
   851  	}
   852  	return
   853  }
   854  
   855  // GetOrInsertBytes is unimplemented
   856  func (s *Int16MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
   857  	panic("unimplemented")
   858  }
   859  
   860  type payloadUint16 struct {
   861  	val     uint16
   862  	memoIdx int32
   863  }
   864  
   865  type entryUint16 struct {
   866  	h       uint64
   867  	payload payloadUint16
   868  }
   869  
   870  func (e entryUint16) Valid() bool { return e.h != sentinel }
   871  
   872  // Uint16HashTable is a hashtable specifically for uint16 that
   873  // is utilized with the MemoTable to generalize interactions for easier
   874  // implementation of dictionaries without losing performance.
   875  type Uint16HashTable struct {
   876  	cap     uint64
   877  	capMask uint64
   878  	size    uint64
   879  
   880  	entries []entryUint16
   881  }
   882  
   883  // NewUint16HashTable returns a new hash table for uint16 values
   884  // initialized with the passed in capacity or 32 whichever is larger.
   885  func NewUint16HashTable(cap uint64) *Uint16HashTable {
   886  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
   887  	ret := &Uint16HashTable{cap: initCap, capMask: initCap - 1, size: 0}
   888  	ret.entries = make([]entryUint16, initCap)
   889  	return ret
   890  }
   891  
   892  // Reset drops all of the values in this hash table and re-initializes it
   893  // with the specified initial capacity as if by calling New, but without having
   894  // to reallocate the object.
   895  func (h *Uint16HashTable) Reset(cap uint64) {
   896  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
   897  	h.capMask = h.cap - 1
   898  	h.size = 0
   899  	h.entries = make([]entryUint16, h.cap)
   900  }
   901  
   902  // CopyValues is used for copying the values out of the hash table into the
   903  // passed in slice, in the order that they were first inserted
   904  func (h *Uint16HashTable) CopyValues(out []uint16) {
   905  	h.CopyValuesSubset(0, out)
   906  }
   907  
   908  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
   909  // with the value at start, in the order that they were inserted.
   910  func (h *Uint16HashTable) CopyValuesSubset(start int, out []uint16) {
   911  	h.VisitEntries(func(e *entryUint16) {
   912  		idx := e.payload.memoIdx - int32(start)
   913  		if idx >= 0 {
   914  			out[idx] = e.payload.val
   915  		}
   916  	})
   917  }
   918  
   919  func (h *Uint16HashTable) WriteOut(out []byte) {
   920  	h.WriteOutSubset(0, out)
   921  }
   922  
   923  func (h *Uint16HashTable) WriteOutSubset(start int, out []byte) {
   924  	data := arrow.Uint16Traits.CastFromBytes(out)
   925  	h.VisitEntries(func(e *entryUint16) {
   926  		idx := e.payload.memoIdx - int32(start)
   927  		if idx >= 0 {
   928  			data[idx] = utils.ToLEUint16(e.payload.val)
   929  		}
   930  	})
   931  }
   932  
   933  func (h *Uint16HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
   934  
   935  func (Uint16HashTable) fixHash(v uint64) uint64 {
   936  	if v == sentinel {
   937  		return 42
   938  	}
   939  	return v
   940  }
   941  
   942  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
   943  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
   944  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
   945  func (h *Uint16HashTable) Lookup(v uint64, cmp func(uint16) bool) (*entryUint16, bool) {
   946  	idx, ok := h.lookup(v, h.capMask, cmp)
   947  	return &h.entries[idx], ok
   948  }
   949  
   950  func (h *Uint16HashTable) lookup(v uint64, szMask uint64, cmp func(uint16) bool) (uint64, bool) {
   951  	const perturbShift uint8 = 5
   952  
   953  	var (
   954  		idx     uint64
   955  		perturb uint64
   956  		e       *entryUint16
   957  	)
   958  
   959  	v = h.fixHash(v)
   960  	idx = v & szMask
   961  	perturb = (v >> uint64(perturbShift)) + 1
   962  
   963  	for {
   964  		e = &h.entries[idx]
   965  		if e.h == v && cmp(e.payload.val) {
   966  			return idx, true
   967  		}
   968  
   969  		if e.h == sentinel {
   970  			return idx, false
   971  		}
   972  
   973  		// perturbation logic inspired from CPython's set/dict object
   974  		// the goal is that all 64 bits of unmasked hash value eventually
   975  		// participate int he probing sequence, to minimize clustering
   976  		idx = (idx + perturb) & szMask
   977  		perturb = (perturb >> uint64(perturbShift)) + 1
   978  	}
   979  }
   980  
   981  func (h *Uint16HashTable) upsize(newcap uint64) error {
   982  	newMask := newcap - 1
   983  
   984  	oldEntries := h.entries
   985  	h.entries = make([]entryUint16, newcap)
   986  	for _, e := range oldEntries {
   987  		if e.Valid() {
   988  			idx, _ := h.lookup(e.h, newMask, func(uint16) bool { return false })
   989  			h.entries[idx] = e
   990  		}
   991  	}
   992  	h.cap = newcap
   993  	h.capMask = newMask
   994  	return nil
   995  }
   996  
   997  // Insert updates the given entry with the provided hash value, payload value and memo index.
   998  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
   999  func (h *Uint16HashTable) Insert(e *entryUint16, v uint64, val uint16, memoIdx int32) error {
  1000  	e.h = h.fixHash(v)
  1001  	e.payload.val = val
  1002  	e.payload.memoIdx = memoIdx
  1003  	h.size++
  1004  
  1005  	if h.needUpsize() {
  1006  		h.upsize(h.cap * uint64(loadFactor) * 2)
  1007  	}
  1008  	return nil
  1009  }
  1010  
  1011  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
  1012  // a valid entry being one which has had a value inserted into it.
  1013  func (h *Uint16HashTable) VisitEntries(visit func(*entryUint16)) {
  1014  	for _, e := range h.entries {
  1015  		if e.Valid() {
  1016  			visit(&e)
  1017  		}
  1018  	}
  1019  }
  1020  
  1021  // Uint16MemoTable is a wrapper over the appropriate hashtable to provide an interface
  1022  // conforming to the MemoTable interface defined in the encoding package for general interactions
  1023  // regarding dictionaries.
  1024  type Uint16MemoTable struct {
  1025  	tbl     *Uint16HashTable
  1026  	nullIdx int32
  1027  }
  1028  
  1029  // NewUint16MemoTable returns a new memotable with num entries pre-allocated to reduce further
  1030  // allocations when inserting.
  1031  func NewUint16MemoTable(num int64) *Uint16MemoTable {
  1032  	return &Uint16MemoTable{tbl: NewUint16HashTable(uint64(num)), nullIdx: KeyNotFound}
  1033  }
  1034  
  1035  func (Uint16MemoTable) TypeTraits() TypeTraits {
  1036  	return arrow.Uint16Traits
  1037  }
  1038  
  1039  // Reset allows this table to be re-used by dumping all the data currently in the table.
  1040  func (s *Uint16MemoTable) Reset() {
  1041  	s.tbl.Reset(32)
  1042  	s.nullIdx = KeyNotFound
  1043  }
  1044  
  1045  // Size returns the current number of inserted elements into the table including if a null
  1046  // has been inserted.
  1047  func (s *Uint16MemoTable) Size() int {
  1048  	sz := int(s.tbl.size)
  1049  	if _, ok := s.GetNull(); ok {
  1050  		sz++
  1051  	}
  1052  	return sz
  1053  }
  1054  
  1055  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
  1056  // that will be true if found and false if not.
  1057  func (s *Uint16MemoTable) GetNull() (int, bool) {
  1058  	return int(s.nullIdx), s.nullIdx != KeyNotFound
  1059  }
  1060  
  1061  // GetOrInsertNull will return the index of the null entry or insert a null entry
  1062  // if one currently doesn't exist. The found value will be true if there was already
  1063  // a null in the table, and false if it inserted one.
  1064  func (s *Uint16MemoTable) GetOrInsertNull() (idx int, found bool) {
  1065  	idx, found = s.GetNull()
  1066  	if !found {
  1067  		idx = s.Size()
  1068  		s.nullIdx = int32(idx)
  1069  	}
  1070  	return
  1071  }
  1072  
  1073  // CopyValues will copy the values from the memo table out into the passed in slice
  1074  // which must be of the appropriate type.
  1075  func (s *Uint16MemoTable) CopyValues(out interface{}) {
  1076  	s.CopyValuesSubset(0, out)
  1077  }
  1078  
  1079  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
  1080  // at the provided start index
  1081  func (s *Uint16MemoTable) CopyValuesSubset(start int, out interface{}) {
  1082  	s.tbl.CopyValuesSubset(start, out.([]uint16))
  1083  }
  1084  
  1085  func (s *Uint16MemoTable) WriteOut(out []byte) {
  1086  	s.tbl.CopyValues(arrow.Uint16Traits.CastFromBytes(out))
  1087  }
  1088  
  1089  func (s *Uint16MemoTable) WriteOutSubset(start int, out []byte) {
  1090  	s.tbl.CopyValuesSubset(start, arrow.Uint16Traits.CastFromBytes(out))
  1091  }
  1092  
  1093  func (s *Uint16MemoTable) WriteOutLE(out []byte) {
  1094  	s.tbl.WriteOut(out)
  1095  }
  1096  
  1097  func (s *Uint16MemoTable) WriteOutSubsetLE(start int, out []byte) {
  1098  	s.tbl.WriteOutSubset(start, out)
  1099  }
  1100  
  1101  // Get returns the index of the requested value in the hash table or KeyNotFound
  1102  // along with a boolean indicating if it was found or not.
  1103  func (s *Uint16MemoTable) Get(val interface{}) (int, bool) {
  1104  
  1105  	h := hashInt(uint64(val.(uint16)), 0)
  1106  	if e, ok := s.tbl.Lookup(h, func(v uint16) bool { return val.(uint16) == v }); ok {
  1107  		return int(e.payload.memoIdx), ok
  1108  	}
  1109  	return KeyNotFound, false
  1110  }
  1111  
  1112  // GetOrInsert will return the index of the specified value in the table, or insert the
  1113  // value into the table and return the new index. found indicates whether or not it already
  1114  // existed in the table (true) or was inserted by this call (false).
  1115  func (s *Uint16MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
  1116  
  1117  	h := hashInt(uint64(val.(uint16)), 0)
  1118  	e, ok := s.tbl.Lookup(h, func(v uint16) bool {
  1119  		return val.(uint16) == v
  1120  	})
  1121  
  1122  	if ok {
  1123  		idx = int(e.payload.memoIdx)
  1124  		found = true
  1125  	} else {
  1126  		idx = s.Size()
  1127  		s.tbl.Insert(e, h, val.(uint16), int32(idx))
  1128  	}
  1129  	return
  1130  }
  1131  
  1132  // GetOrInsertBytes is unimplemented
  1133  func (s *Uint16MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
  1134  	panic("unimplemented")
  1135  }
  1136  
  1137  type payloadInt32 struct {
  1138  	val     int32
  1139  	memoIdx int32
  1140  }
  1141  
  1142  type entryInt32 struct {
  1143  	h       uint64
  1144  	payload payloadInt32
  1145  }
  1146  
  1147  func (e entryInt32) Valid() bool { return e.h != sentinel }
  1148  
  1149  // Int32HashTable is a hashtable specifically for int32 that
  1150  // is utilized with the MemoTable to generalize interactions for easier
  1151  // implementation of dictionaries without losing performance.
  1152  type Int32HashTable struct {
  1153  	cap     uint64
  1154  	capMask uint64
  1155  	size    uint64
  1156  
  1157  	entries []entryInt32
  1158  }
  1159  
  1160  // NewInt32HashTable returns a new hash table for int32 values
  1161  // initialized with the passed in capacity or 32 whichever is larger.
  1162  func NewInt32HashTable(cap uint64) *Int32HashTable {
  1163  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  1164  	ret := &Int32HashTable{cap: initCap, capMask: initCap - 1, size: 0}
  1165  	ret.entries = make([]entryInt32, initCap)
  1166  	return ret
  1167  }
  1168  
  1169  // Reset drops all of the values in this hash table and re-initializes it
  1170  // with the specified initial capacity as if by calling New, but without having
  1171  // to reallocate the object.
  1172  func (h *Int32HashTable) Reset(cap uint64) {
  1173  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  1174  	h.capMask = h.cap - 1
  1175  	h.size = 0
  1176  	h.entries = make([]entryInt32, h.cap)
  1177  }
  1178  
  1179  // CopyValues is used for copying the values out of the hash table into the
  1180  // passed in slice, in the order that they were first inserted
  1181  func (h *Int32HashTable) CopyValues(out []int32) {
  1182  	h.CopyValuesSubset(0, out)
  1183  }
  1184  
  1185  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
  1186  // with the value at start, in the order that they were inserted.
  1187  func (h *Int32HashTable) CopyValuesSubset(start int, out []int32) {
  1188  	h.VisitEntries(func(e *entryInt32) {
  1189  		idx := e.payload.memoIdx - int32(start)
  1190  		if idx >= 0 {
  1191  			out[idx] = e.payload.val
  1192  		}
  1193  	})
  1194  }
  1195  
  1196  func (h *Int32HashTable) WriteOut(out []byte) {
  1197  	h.WriteOutSubset(0, out)
  1198  }
  1199  
  1200  func (h *Int32HashTable) WriteOutSubset(start int, out []byte) {
  1201  	data := arrow.Int32Traits.CastFromBytes(out)
  1202  	h.VisitEntries(func(e *entryInt32) {
  1203  		idx := e.payload.memoIdx - int32(start)
  1204  		if idx >= 0 {
  1205  			data[idx] = utils.ToLEInt32(e.payload.val)
  1206  		}
  1207  	})
  1208  }
  1209  
  1210  func (h *Int32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
  1211  
  1212  func (Int32HashTable) fixHash(v uint64) uint64 {
  1213  	if v == sentinel {
  1214  		return 42
  1215  	}
  1216  	return v
  1217  }
  1218  
  1219  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
  1220  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
  1221  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
  1222  func (h *Int32HashTable) Lookup(v uint64, cmp func(int32) bool) (*entryInt32, bool) {
  1223  	idx, ok := h.lookup(v, h.capMask, cmp)
  1224  	return &h.entries[idx], ok
  1225  }
  1226  
  1227  func (h *Int32HashTable) lookup(v uint64, szMask uint64, cmp func(int32) bool) (uint64, bool) {
  1228  	const perturbShift uint8 = 5
  1229  
  1230  	var (
  1231  		idx     uint64
  1232  		perturb uint64
  1233  		e       *entryInt32
  1234  	)
  1235  
  1236  	v = h.fixHash(v)
  1237  	idx = v & szMask
  1238  	perturb = (v >> uint64(perturbShift)) + 1
  1239  
  1240  	for {
  1241  		e = &h.entries[idx]
  1242  		if e.h == v && cmp(e.payload.val) {
  1243  			return idx, true
  1244  		}
  1245  
  1246  		if e.h == sentinel {
  1247  			return idx, false
  1248  		}
  1249  
  1250  		// perturbation logic inspired from CPython's set/dict object
  1251  		// the goal is that all 64 bits of unmasked hash value eventually
  1252  		// participate int he probing sequence, to minimize clustering
  1253  		idx = (idx + perturb) & szMask
  1254  		perturb = (perturb >> uint64(perturbShift)) + 1
  1255  	}
  1256  }
  1257  
  1258  func (h *Int32HashTable) upsize(newcap uint64) error {
  1259  	newMask := newcap - 1
  1260  
  1261  	oldEntries := h.entries
  1262  	h.entries = make([]entryInt32, newcap)
  1263  	for _, e := range oldEntries {
  1264  		if e.Valid() {
  1265  			idx, _ := h.lookup(e.h, newMask, func(int32) bool { return false })
  1266  			h.entries[idx] = e
  1267  		}
  1268  	}
  1269  	h.cap = newcap
  1270  	h.capMask = newMask
  1271  	return nil
  1272  }
  1273  
  1274  // Insert updates the given entry with the provided hash value, payload value and memo index.
  1275  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
  1276  func (h *Int32HashTable) Insert(e *entryInt32, v uint64, val int32, memoIdx int32) error {
  1277  	e.h = h.fixHash(v)
  1278  	e.payload.val = val
  1279  	e.payload.memoIdx = memoIdx
  1280  	h.size++
  1281  
  1282  	if h.needUpsize() {
  1283  		h.upsize(h.cap * uint64(loadFactor) * 2)
  1284  	}
  1285  	return nil
  1286  }
  1287  
  1288  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
  1289  // a valid entry being one which has had a value inserted into it.
  1290  func (h *Int32HashTable) VisitEntries(visit func(*entryInt32)) {
  1291  	for _, e := range h.entries {
  1292  		if e.Valid() {
  1293  			visit(&e)
  1294  		}
  1295  	}
  1296  }
  1297  
  1298  // Int32MemoTable is a wrapper over the appropriate hashtable to provide an interface
  1299  // conforming to the MemoTable interface defined in the encoding package for general interactions
  1300  // regarding dictionaries.
  1301  type Int32MemoTable struct {
  1302  	tbl     *Int32HashTable
  1303  	nullIdx int32
  1304  }
  1305  
  1306  // NewInt32MemoTable returns a new memotable with num entries pre-allocated to reduce further
  1307  // allocations when inserting.
  1308  func NewInt32MemoTable(num int64) *Int32MemoTable {
  1309  	return &Int32MemoTable{tbl: NewInt32HashTable(uint64(num)), nullIdx: KeyNotFound}
  1310  }
  1311  
  1312  func (Int32MemoTable) TypeTraits() TypeTraits {
  1313  	return arrow.Int32Traits
  1314  }
  1315  
  1316  // Reset allows this table to be re-used by dumping all the data currently in the table.
  1317  func (s *Int32MemoTable) Reset() {
  1318  	s.tbl.Reset(32)
  1319  	s.nullIdx = KeyNotFound
  1320  }
  1321  
  1322  // Size returns the current number of inserted elements into the table including if a null
  1323  // has been inserted.
  1324  func (s *Int32MemoTable) Size() int {
  1325  	sz := int(s.tbl.size)
  1326  	if _, ok := s.GetNull(); ok {
  1327  		sz++
  1328  	}
  1329  	return sz
  1330  }
  1331  
  1332  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
  1333  // that will be true if found and false if not.
  1334  func (s *Int32MemoTable) GetNull() (int, bool) {
  1335  	return int(s.nullIdx), s.nullIdx != KeyNotFound
  1336  }
  1337  
  1338  // GetOrInsertNull will return the index of the null entry or insert a null entry
  1339  // if one currently doesn't exist. The found value will be true if there was already
  1340  // a null in the table, and false if it inserted one.
  1341  func (s *Int32MemoTable) GetOrInsertNull() (idx int, found bool) {
  1342  	idx, found = s.GetNull()
  1343  	if !found {
  1344  		idx = s.Size()
  1345  		s.nullIdx = int32(idx)
  1346  	}
  1347  	return
  1348  }
  1349  
  1350  // CopyValues will copy the values from the memo table out into the passed in slice
  1351  // which must be of the appropriate type.
  1352  func (s *Int32MemoTable) CopyValues(out interface{}) {
  1353  	s.CopyValuesSubset(0, out)
  1354  }
  1355  
  1356  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
  1357  // at the provided start index
  1358  func (s *Int32MemoTable) CopyValuesSubset(start int, out interface{}) {
  1359  	s.tbl.CopyValuesSubset(start, out.([]int32))
  1360  }
  1361  
  1362  func (s *Int32MemoTable) WriteOut(out []byte) {
  1363  	s.tbl.CopyValues(arrow.Int32Traits.CastFromBytes(out))
  1364  }
  1365  
  1366  func (s *Int32MemoTable) WriteOutSubset(start int, out []byte) {
  1367  	s.tbl.CopyValuesSubset(start, arrow.Int32Traits.CastFromBytes(out))
  1368  }
  1369  
  1370  func (s *Int32MemoTable) WriteOutLE(out []byte) {
  1371  	s.tbl.WriteOut(out)
  1372  }
  1373  
  1374  func (s *Int32MemoTable) WriteOutSubsetLE(start int, out []byte) {
  1375  	s.tbl.WriteOutSubset(start, out)
  1376  }
  1377  
  1378  // Get returns the index of the requested value in the hash table or KeyNotFound
  1379  // along with a boolean indicating if it was found or not.
  1380  func (s *Int32MemoTable) Get(val interface{}) (int, bool) {
  1381  
  1382  	h := hashInt(uint64(val.(int32)), 0)
  1383  	if e, ok := s.tbl.Lookup(h, func(v int32) bool { return val.(int32) == v }); ok {
  1384  		return int(e.payload.memoIdx), ok
  1385  	}
  1386  	return KeyNotFound, false
  1387  }
  1388  
  1389  // GetOrInsert will return the index of the specified value in the table, or insert the
  1390  // value into the table and return the new index. found indicates whether or not it already
  1391  // existed in the table (true) or was inserted by this call (false).
  1392  func (s *Int32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
  1393  
  1394  	h := hashInt(uint64(val.(int32)), 0)
  1395  	e, ok := s.tbl.Lookup(h, func(v int32) bool {
  1396  		return val.(int32) == v
  1397  	})
  1398  
  1399  	if ok {
  1400  		idx = int(e.payload.memoIdx)
  1401  		found = true
  1402  	} else {
  1403  		idx = s.Size()
  1404  		s.tbl.Insert(e, h, val.(int32), int32(idx))
  1405  	}
  1406  	return
  1407  }
  1408  
  1409  // GetOrInsertBytes is unimplemented
  1410  func (s *Int32MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
  1411  	panic("unimplemented")
  1412  }
  1413  
  1414  type payloadInt64 struct {
  1415  	val     int64
  1416  	memoIdx int32
  1417  }
  1418  
  1419  type entryInt64 struct {
  1420  	h       uint64
  1421  	payload payloadInt64
  1422  }
  1423  
  1424  func (e entryInt64) Valid() bool { return e.h != sentinel }
  1425  
  1426  // Int64HashTable is a hashtable specifically for int64 that
  1427  // is utilized with the MemoTable to generalize interactions for easier
  1428  // implementation of dictionaries without losing performance.
  1429  type Int64HashTable struct {
  1430  	cap     uint64
  1431  	capMask uint64
  1432  	size    uint64
  1433  
  1434  	entries []entryInt64
  1435  }
  1436  
  1437  // NewInt64HashTable returns a new hash table for int64 values
  1438  // initialized with the passed in capacity or 32 whichever is larger.
  1439  func NewInt64HashTable(cap uint64) *Int64HashTable {
  1440  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  1441  	ret := &Int64HashTable{cap: initCap, capMask: initCap - 1, size: 0}
  1442  	ret.entries = make([]entryInt64, initCap)
  1443  	return ret
  1444  }
  1445  
  1446  // Reset drops all of the values in this hash table and re-initializes it
  1447  // with the specified initial capacity as if by calling New, but without having
  1448  // to reallocate the object.
  1449  func (h *Int64HashTable) Reset(cap uint64) {
  1450  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  1451  	h.capMask = h.cap - 1
  1452  	h.size = 0
  1453  	h.entries = make([]entryInt64, h.cap)
  1454  }
  1455  
  1456  // CopyValues is used for copying the values out of the hash table into the
  1457  // passed in slice, in the order that they were first inserted
  1458  func (h *Int64HashTable) CopyValues(out []int64) {
  1459  	h.CopyValuesSubset(0, out)
  1460  }
  1461  
  1462  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
  1463  // with the value at start, in the order that they were inserted.
  1464  func (h *Int64HashTable) CopyValuesSubset(start int, out []int64) {
  1465  	h.VisitEntries(func(e *entryInt64) {
  1466  		idx := e.payload.memoIdx - int32(start)
  1467  		if idx >= 0 {
  1468  			out[idx] = e.payload.val
  1469  		}
  1470  	})
  1471  }
  1472  
  1473  func (h *Int64HashTable) WriteOut(out []byte) {
  1474  	h.WriteOutSubset(0, out)
  1475  }
  1476  
  1477  func (h *Int64HashTable) WriteOutSubset(start int, out []byte) {
  1478  	data := arrow.Int64Traits.CastFromBytes(out)
  1479  	h.VisitEntries(func(e *entryInt64) {
  1480  		idx := e.payload.memoIdx - int32(start)
  1481  		if idx >= 0 {
  1482  			data[idx] = utils.ToLEInt64(e.payload.val)
  1483  		}
  1484  	})
  1485  }
  1486  
  1487  func (h *Int64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
  1488  
  1489  func (Int64HashTable) fixHash(v uint64) uint64 {
  1490  	if v == sentinel {
  1491  		return 42
  1492  	}
  1493  	return v
  1494  }
  1495  
  1496  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
  1497  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
  1498  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
  1499  func (h *Int64HashTable) Lookup(v uint64, cmp func(int64) bool) (*entryInt64, bool) {
  1500  	idx, ok := h.lookup(v, h.capMask, cmp)
  1501  	return &h.entries[idx], ok
  1502  }
  1503  
  1504  func (h *Int64HashTable) lookup(v uint64, szMask uint64, cmp func(int64) bool) (uint64, bool) {
  1505  	const perturbShift uint8 = 5
  1506  
  1507  	var (
  1508  		idx     uint64
  1509  		perturb uint64
  1510  		e       *entryInt64
  1511  	)
  1512  
  1513  	v = h.fixHash(v)
  1514  	idx = v & szMask
  1515  	perturb = (v >> uint64(perturbShift)) + 1
  1516  
  1517  	for {
  1518  		e = &h.entries[idx]
  1519  		if e.h == v && cmp(e.payload.val) {
  1520  			return idx, true
  1521  		}
  1522  
  1523  		if e.h == sentinel {
  1524  			return idx, false
  1525  		}
  1526  
  1527  		// perturbation logic inspired from CPython's set/dict object
  1528  		// the goal is that all 64 bits of unmasked hash value eventually
  1529  		// participate int he probing sequence, to minimize clustering
  1530  		idx = (idx + perturb) & szMask
  1531  		perturb = (perturb >> uint64(perturbShift)) + 1
  1532  	}
  1533  }
  1534  
  1535  func (h *Int64HashTable) upsize(newcap uint64) error {
  1536  	newMask := newcap - 1
  1537  
  1538  	oldEntries := h.entries
  1539  	h.entries = make([]entryInt64, newcap)
  1540  	for _, e := range oldEntries {
  1541  		if e.Valid() {
  1542  			idx, _ := h.lookup(e.h, newMask, func(int64) bool { return false })
  1543  			h.entries[idx] = e
  1544  		}
  1545  	}
  1546  	h.cap = newcap
  1547  	h.capMask = newMask
  1548  	return nil
  1549  }
  1550  
  1551  // Insert updates the given entry with the provided hash value, payload value and memo index.
  1552  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
  1553  func (h *Int64HashTable) Insert(e *entryInt64, v uint64, val int64, memoIdx int32) error {
  1554  	e.h = h.fixHash(v)
  1555  	e.payload.val = val
  1556  	e.payload.memoIdx = memoIdx
  1557  	h.size++
  1558  
  1559  	if h.needUpsize() {
  1560  		h.upsize(h.cap * uint64(loadFactor) * 2)
  1561  	}
  1562  	return nil
  1563  }
  1564  
  1565  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
  1566  // a valid entry being one which has had a value inserted into it.
  1567  func (h *Int64HashTable) VisitEntries(visit func(*entryInt64)) {
  1568  	for _, e := range h.entries {
  1569  		if e.Valid() {
  1570  			visit(&e)
  1571  		}
  1572  	}
  1573  }
  1574  
  1575  // Int64MemoTable is a wrapper over the appropriate hashtable to provide an interface
  1576  // conforming to the MemoTable interface defined in the encoding package for general interactions
  1577  // regarding dictionaries.
  1578  type Int64MemoTable struct {
  1579  	tbl     *Int64HashTable
  1580  	nullIdx int32
  1581  }
  1582  
  1583  // NewInt64MemoTable returns a new memotable with num entries pre-allocated to reduce further
  1584  // allocations when inserting.
  1585  func NewInt64MemoTable(num int64) *Int64MemoTable {
  1586  	return &Int64MemoTable{tbl: NewInt64HashTable(uint64(num)), nullIdx: KeyNotFound}
  1587  }
  1588  
  1589  func (Int64MemoTable) TypeTraits() TypeTraits {
  1590  	return arrow.Int64Traits
  1591  }
  1592  
  1593  // Reset allows this table to be re-used by dumping all the data currently in the table.
  1594  func (s *Int64MemoTable) Reset() {
  1595  	s.tbl.Reset(32)
  1596  	s.nullIdx = KeyNotFound
  1597  }
  1598  
  1599  // Size returns the current number of inserted elements into the table including if a null
  1600  // has been inserted.
  1601  func (s *Int64MemoTable) Size() int {
  1602  	sz := int(s.tbl.size)
  1603  	if _, ok := s.GetNull(); ok {
  1604  		sz++
  1605  	}
  1606  	return sz
  1607  }
  1608  
  1609  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
  1610  // that will be true if found and false if not.
  1611  func (s *Int64MemoTable) GetNull() (int, bool) {
  1612  	return int(s.nullIdx), s.nullIdx != KeyNotFound
  1613  }
  1614  
  1615  // GetOrInsertNull will return the index of the null entry or insert a null entry
  1616  // if one currently doesn't exist. The found value will be true if there was already
  1617  // a null in the table, and false if it inserted one.
  1618  func (s *Int64MemoTable) GetOrInsertNull() (idx int, found bool) {
  1619  	idx, found = s.GetNull()
  1620  	if !found {
  1621  		idx = s.Size()
  1622  		s.nullIdx = int32(idx)
  1623  	}
  1624  	return
  1625  }
  1626  
  1627  // CopyValues will copy the values from the memo table out into the passed in slice
  1628  // which must be of the appropriate type.
  1629  func (s *Int64MemoTable) CopyValues(out interface{}) {
  1630  	s.CopyValuesSubset(0, out)
  1631  }
  1632  
  1633  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
  1634  // at the provided start index
  1635  func (s *Int64MemoTable) CopyValuesSubset(start int, out interface{}) {
  1636  	s.tbl.CopyValuesSubset(start, out.([]int64))
  1637  }
  1638  
  1639  func (s *Int64MemoTable) WriteOut(out []byte) {
  1640  	s.tbl.CopyValues(arrow.Int64Traits.CastFromBytes(out))
  1641  }
  1642  
  1643  func (s *Int64MemoTable) WriteOutSubset(start int, out []byte) {
  1644  	s.tbl.CopyValuesSubset(start, arrow.Int64Traits.CastFromBytes(out))
  1645  }
  1646  
  1647  func (s *Int64MemoTable) WriteOutLE(out []byte) {
  1648  	s.tbl.WriteOut(out)
  1649  }
  1650  
  1651  func (s *Int64MemoTable) WriteOutSubsetLE(start int, out []byte) {
  1652  	s.tbl.WriteOutSubset(start, out)
  1653  }
  1654  
  1655  // Get returns the index of the requested value in the hash table or KeyNotFound
  1656  // along with a boolean indicating if it was found or not.
  1657  func (s *Int64MemoTable) Get(val interface{}) (int, bool) {
  1658  
  1659  	h := hashInt(uint64(val.(int64)), 0)
  1660  	if e, ok := s.tbl.Lookup(h, func(v int64) bool { return val.(int64) == v }); ok {
  1661  		return int(e.payload.memoIdx), ok
  1662  	}
  1663  	return KeyNotFound, false
  1664  }
  1665  
  1666  // GetOrInsert will return the index of the specified value in the table, or insert the
  1667  // value into the table and return the new index. found indicates whether or not it already
  1668  // existed in the table (true) or was inserted by this call (false).
  1669  func (s *Int64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
  1670  
  1671  	h := hashInt(uint64(val.(int64)), 0)
  1672  	e, ok := s.tbl.Lookup(h, func(v int64) bool {
  1673  		return val.(int64) == v
  1674  	})
  1675  
  1676  	if ok {
  1677  		idx = int(e.payload.memoIdx)
  1678  		found = true
  1679  	} else {
  1680  		idx = s.Size()
  1681  		s.tbl.Insert(e, h, val.(int64), int32(idx))
  1682  	}
  1683  	return
  1684  }
  1685  
  1686  // GetOrInsertBytes is unimplemented
  1687  func (s *Int64MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
  1688  	panic("unimplemented")
  1689  }
  1690  
  1691  type payloadUint32 struct {
  1692  	val     uint32
  1693  	memoIdx int32
  1694  }
  1695  
  1696  type entryUint32 struct {
  1697  	h       uint64
  1698  	payload payloadUint32
  1699  }
  1700  
  1701  func (e entryUint32) Valid() bool { return e.h != sentinel }
  1702  
  1703  // Uint32HashTable is a hashtable specifically for uint32 that
  1704  // is utilized with the MemoTable to generalize interactions for easier
  1705  // implementation of dictionaries without losing performance.
  1706  type Uint32HashTable struct {
  1707  	cap     uint64
  1708  	capMask uint64
  1709  	size    uint64
  1710  
  1711  	entries []entryUint32
  1712  }
  1713  
  1714  // NewUint32HashTable returns a new hash table for uint32 values
  1715  // initialized with the passed in capacity or 32 whichever is larger.
  1716  func NewUint32HashTable(cap uint64) *Uint32HashTable {
  1717  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  1718  	ret := &Uint32HashTable{cap: initCap, capMask: initCap - 1, size: 0}
  1719  	ret.entries = make([]entryUint32, initCap)
  1720  	return ret
  1721  }
  1722  
  1723  // Reset drops all of the values in this hash table and re-initializes it
  1724  // with the specified initial capacity as if by calling New, but without having
  1725  // to reallocate the object.
  1726  func (h *Uint32HashTable) Reset(cap uint64) {
  1727  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  1728  	h.capMask = h.cap - 1
  1729  	h.size = 0
  1730  	h.entries = make([]entryUint32, h.cap)
  1731  }
  1732  
  1733  // CopyValues is used for copying the values out of the hash table into the
  1734  // passed in slice, in the order that they were first inserted
  1735  func (h *Uint32HashTable) CopyValues(out []uint32) {
  1736  	h.CopyValuesSubset(0, out)
  1737  }
  1738  
  1739  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
  1740  // with the value at start, in the order that they were inserted.
  1741  func (h *Uint32HashTable) CopyValuesSubset(start int, out []uint32) {
  1742  	h.VisitEntries(func(e *entryUint32) {
  1743  		idx := e.payload.memoIdx - int32(start)
  1744  		if idx >= 0 {
  1745  			out[idx] = e.payload.val
  1746  		}
  1747  	})
  1748  }
  1749  
  1750  func (h *Uint32HashTable) WriteOut(out []byte) {
  1751  	h.WriteOutSubset(0, out)
  1752  }
  1753  
  1754  func (h *Uint32HashTable) WriteOutSubset(start int, out []byte) {
  1755  	data := arrow.Uint32Traits.CastFromBytes(out)
  1756  	h.VisitEntries(func(e *entryUint32) {
  1757  		idx := e.payload.memoIdx - int32(start)
  1758  		if idx >= 0 {
  1759  			data[idx] = utils.ToLEUint32(e.payload.val)
  1760  		}
  1761  	})
  1762  }
  1763  
  1764  func (h *Uint32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
  1765  
  1766  func (Uint32HashTable) fixHash(v uint64) uint64 {
  1767  	if v == sentinel {
  1768  		return 42
  1769  	}
  1770  	return v
  1771  }
  1772  
  1773  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
  1774  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
  1775  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
  1776  func (h *Uint32HashTable) Lookup(v uint64, cmp func(uint32) bool) (*entryUint32, bool) {
  1777  	idx, ok := h.lookup(v, h.capMask, cmp)
  1778  	return &h.entries[idx], ok
  1779  }
  1780  
  1781  func (h *Uint32HashTable) lookup(v uint64, szMask uint64, cmp func(uint32) bool) (uint64, bool) {
  1782  	const perturbShift uint8 = 5
  1783  
  1784  	var (
  1785  		idx     uint64
  1786  		perturb uint64
  1787  		e       *entryUint32
  1788  	)
  1789  
  1790  	v = h.fixHash(v)
  1791  	idx = v & szMask
  1792  	perturb = (v >> uint64(perturbShift)) + 1
  1793  
  1794  	for {
  1795  		e = &h.entries[idx]
  1796  		if e.h == v && cmp(e.payload.val) {
  1797  			return idx, true
  1798  		}
  1799  
  1800  		if e.h == sentinel {
  1801  			return idx, false
  1802  		}
  1803  
  1804  		// perturbation logic inspired from CPython's set/dict object
  1805  		// the goal is that all 64 bits of unmasked hash value eventually
  1806  		// participate int he probing sequence, to minimize clustering
  1807  		idx = (idx + perturb) & szMask
  1808  		perturb = (perturb >> uint64(perturbShift)) + 1
  1809  	}
  1810  }
  1811  
  1812  func (h *Uint32HashTable) upsize(newcap uint64) error {
  1813  	newMask := newcap - 1
  1814  
  1815  	oldEntries := h.entries
  1816  	h.entries = make([]entryUint32, newcap)
  1817  	for _, e := range oldEntries {
  1818  		if e.Valid() {
  1819  			idx, _ := h.lookup(e.h, newMask, func(uint32) bool { return false })
  1820  			h.entries[idx] = e
  1821  		}
  1822  	}
  1823  	h.cap = newcap
  1824  	h.capMask = newMask
  1825  	return nil
  1826  }
  1827  
  1828  // Insert updates the given entry with the provided hash value, payload value and memo index.
  1829  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
  1830  func (h *Uint32HashTable) Insert(e *entryUint32, v uint64, val uint32, memoIdx int32) error {
  1831  	e.h = h.fixHash(v)
  1832  	e.payload.val = val
  1833  	e.payload.memoIdx = memoIdx
  1834  	h.size++
  1835  
  1836  	if h.needUpsize() {
  1837  		h.upsize(h.cap * uint64(loadFactor) * 2)
  1838  	}
  1839  	return nil
  1840  }
  1841  
  1842  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
  1843  // a valid entry being one which has had a value inserted into it.
  1844  func (h *Uint32HashTable) VisitEntries(visit func(*entryUint32)) {
  1845  	for _, e := range h.entries {
  1846  		if e.Valid() {
  1847  			visit(&e)
  1848  		}
  1849  	}
  1850  }
  1851  
  1852  // Uint32MemoTable is a wrapper over the appropriate hashtable to provide an interface
  1853  // conforming to the MemoTable interface defined in the encoding package for general interactions
  1854  // regarding dictionaries.
  1855  type Uint32MemoTable struct {
  1856  	tbl     *Uint32HashTable
  1857  	nullIdx int32
  1858  }
  1859  
  1860  // NewUint32MemoTable returns a new memotable with num entries pre-allocated to reduce further
  1861  // allocations when inserting.
  1862  func NewUint32MemoTable(num int64) *Uint32MemoTable {
  1863  	return &Uint32MemoTable{tbl: NewUint32HashTable(uint64(num)), nullIdx: KeyNotFound}
  1864  }
  1865  
  1866  func (Uint32MemoTable) TypeTraits() TypeTraits {
  1867  	return arrow.Uint32Traits
  1868  }
  1869  
  1870  // Reset allows this table to be re-used by dumping all the data currently in the table.
  1871  func (s *Uint32MemoTable) Reset() {
  1872  	s.tbl.Reset(32)
  1873  	s.nullIdx = KeyNotFound
  1874  }
  1875  
  1876  // Size returns the current number of inserted elements into the table including if a null
  1877  // has been inserted.
  1878  func (s *Uint32MemoTable) Size() int {
  1879  	sz := int(s.tbl.size)
  1880  	if _, ok := s.GetNull(); ok {
  1881  		sz++
  1882  	}
  1883  	return sz
  1884  }
  1885  
  1886  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
  1887  // that will be true if found and false if not.
  1888  func (s *Uint32MemoTable) GetNull() (int, bool) {
  1889  	return int(s.nullIdx), s.nullIdx != KeyNotFound
  1890  }
  1891  
  1892  // GetOrInsertNull will return the index of the null entry or insert a null entry
  1893  // if one currently doesn't exist. The found value will be true if there was already
  1894  // a null in the table, and false if it inserted one.
  1895  func (s *Uint32MemoTable) GetOrInsertNull() (idx int, found bool) {
  1896  	idx, found = s.GetNull()
  1897  	if !found {
  1898  		idx = s.Size()
  1899  		s.nullIdx = int32(idx)
  1900  	}
  1901  	return
  1902  }
  1903  
  1904  // CopyValues will copy the values from the memo table out into the passed in slice
  1905  // which must be of the appropriate type.
  1906  func (s *Uint32MemoTable) CopyValues(out interface{}) {
  1907  	s.CopyValuesSubset(0, out)
  1908  }
  1909  
  1910  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
  1911  // at the provided start index
  1912  func (s *Uint32MemoTable) CopyValuesSubset(start int, out interface{}) {
  1913  	s.tbl.CopyValuesSubset(start, out.([]uint32))
  1914  }
  1915  
  1916  func (s *Uint32MemoTable) WriteOut(out []byte) {
  1917  	s.tbl.CopyValues(arrow.Uint32Traits.CastFromBytes(out))
  1918  }
  1919  
  1920  func (s *Uint32MemoTable) WriteOutSubset(start int, out []byte) {
  1921  	s.tbl.CopyValuesSubset(start, arrow.Uint32Traits.CastFromBytes(out))
  1922  }
  1923  
  1924  func (s *Uint32MemoTable) WriteOutLE(out []byte) {
  1925  	s.tbl.WriteOut(out)
  1926  }
  1927  
  1928  func (s *Uint32MemoTable) WriteOutSubsetLE(start int, out []byte) {
  1929  	s.tbl.WriteOutSubset(start, out)
  1930  }
  1931  
  1932  // Get returns the index of the requested value in the hash table or KeyNotFound
  1933  // along with a boolean indicating if it was found or not.
  1934  func (s *Uint32MemoTable) Get(val interface{}) (int, bool) {
  1935  
  1936  	h := hashInt(uint64(val.(uint32)), 0)
  1937  	if e, ok := s.tbl.Lookup(h, func(v uint32) bool { return val.(uint32) == v }); ok {
  1938  		return int(e.payload.memoIdx), ok
  1939  	}
  1940  	return KeyNotFound, false
  1941  }
  1942  
  1943  // GetOrInsert will return the index of the specified value in the table, or insert the
  1944  // value into the table and return the new index. found indicates whether or not it already
  1945  // existed in the table (true) or was inserted by this call (false).
  1946  func (s *Uint32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
  1947  
  1948  	h := hashInt(uint64(val.(uint32)), 0)
  1949  	e, ok := s.tbl.Lookup(h, func(v uint32) bool {
  1950  		return val.(uint32) == v
  1951  	})
  1952  
  1953  	if ok {
  1954  		idx = int(e.payload.memoIdx)
  1955  		found = true
  1956  	} else {
  1957  		idx = s.Size()
  1958  		s.tbl.Insert(e, h, val.(uint32), int32(idx))
  1959  	}
  1960  	return
  1961  }
  1962  
  1963  // GetOrInsertBytes is unimplemented
  1964  func (s *Uint32MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
  1965  	panic("unimplemented")
  1966  }
  1967  
  1968  type payloadUint64 struct {
  1969  	val     uint64
  1970  	memoIdx int32
  1971  }
  1972  
  1973  type entryUint64 struct {
  1974  	h       uint64
  1975  	payload payloadUint64
  1976  }
  1977  
  1978  func (e entryUint64) Valid() bool { return e.h != sentinel }
  1979  
  1980  // Uint64HashTable is a hashtable specifically for uint64 that
  1981  // is utilized with the MemoTable to generalize interactions for easier
  1982  // implementation of dictionaries without losing performance.
  1983  type Uint64HashTable struct {
  1984  	cap     uint64
  1985  	capMask uint64
  1986  	size    uint64
  1987  
  1988  	entries []entryUint64
  1989  }
  1990  
  1991  // NewUint64HashTable returns a new hash table for uint64 values
  1992  // initialized with the passed in capacity or 32 whichever is larger.
  1993  func NewUint64HashTable(cap uint64) *Uint64HashTable {
  1994  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  1995  	ret := &Uint64HashTable{cap: initCap, capMask: initCap - 1, size: 0}
  1996  	ret.entries = make([]entryUint64, initCap)
  1997  	return ret
  1998  }
  1999  
  2000  // Reset drops all of the values in this hash table and re-initializes it
  2001  // with the specified initial capacity as if by calling New, but without having
  2002  // to reallocate the object.
  2003  func (h *Uint64HashTable) Reset(cap uint64) {
  2004  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  2005  	h.capMask = h.cap - 1
  2006  	h.size = 0
  2007  	h.entries = make([]entryUint64, h.cap)
  2008  }
  2009  
  2010  // CopyValues is used for copying the values out of the hash table into the
  2011  // passed in slice, in the order that they were first inserted
  2012  func (h *Uint64HashTable) CopyValues(out []uint64) {
  2013  	h.CopyValuesSubset(0, out)
  2014  }
  2015  
  2016  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
  2017  // with the value at start, in the order that they were inserted.
  2018  func (h *Uint64HashTable) CopyValuesSubset(start int, out []uint64) {
  2019  	h.VisitEntries(func(e *entryUint64) {
  2020  		idx := e.payload.memoIdx - int32(start)
  2021  		if idx >= 0 {
  2022  			out[idx] = e.payload.val
  2023  		}
  2024  	})
  2025  }
  2026  
  2027  func (h *Uint64HashTable) WriteOut(out []byte) {
  2028  	h.WriteOutSubset(0, out)
  2029  }
  2030  
  2031  func (h *Uint64HashTable) WriteOutSubset(start int, out []byte) {
  2032  	data := arrow.Uint64Traits.CastFromBytes(out)
  2033  	h.VisitEntries(func(e *entryUint64) {
  2034  		idx := e.payload.memoIdx - int32(start)
  2035  		if idx >= 0 {
  2036  			data[idx] = utils.ToLEUint64(e.payload.val)
  2037  		}
  2038  	})
  2039  }
  2040  
  2041  func (h *Uint64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
  2042  
  2043  func (Uint64HashTable) fixHash(v uint64) uint64 {
  2044  	if v == sentinel {
  2045  		return 42
  2046  	}
  2047  	return v
  2048  }
  2049  
  2050  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
  2051  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
  2052  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
  2053  func (h *Uint64HashTable) Lookup(v uint64, cmp func(uint64) bool) (*entryUint64, bool) {
  2054  	idx, ok := h.lookup(v, h.capMask, cmp)
  2055  	return &h.entries[idx], ok
  2056  }
  2057  
  2058  func (h *Uint64HashTable) lookup(v uint64, szMask uint64, cmp func(uint64) bool) (uint64, bool) {
  2059  	const perturbShift uint8 = 5
  2060  
  2061  	var (
  2062  		idx     uint64
  2063  		perturb uint64
  2064  		e       *entryUint64
  2065  	)
  2066  
  2067  	v = h.fixHash(v)
  2068  	idx = v & szMask
  2069  	perturb = (v >> uint64(perturbShift)) + 1
  2070  
  2071  	for {
  2072  		e = &h.entries[idx]
  2073  		if e.h == v && cmp(e.payload.val) {
  2074  			return idx, true
  2075  		}
  2076  
  2077  		if e.h == sentinel {
  2078  			return idx, false
  2079  		}
  2080  
  2081  		// perturbation logic inspired from CPython's set/dict object
  2082  		// the goal is that all 64 bits of unmasked hash value eventually
  2083  		// participate int he probing sequence, to minimize clustering
  2084  		idx = (idx + perturb) & szMask
  2085  		perturb = (perturb >> uint64(perturbShift)) + 1
  2086  	}
  2087  }
  2088  
  2089  func (h *Uint64HashTable) upsize(newcap uint64) error {
  2090  	newMask := newcap - 1
  2091  
  2092  	oldEntries := h.entries
  2093  	h.entries = make([]entryUint64, newcap)
  2094  	for _, e := range oldEntries {
  2095  		if e.Valid() {
  2096  			idx, _ := h.lookup(e.h, newMask, func(uint64) bool { return false })
  2097  			h.entries[idx] = e
  2098  		}
  2099  	}
  2100  	h.cap = newcap
  2101  	h.capMask = newMask
  2102  	return nil
  2103  }
  2104  
  2105  // Insert updates the given entry with the provided hash value, payload value and memo index.
  2106  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
  2107  func (h *Uint64HashTable) Insert(e *entryUint64, v uint64, val uint64, memoIdx int32) error {
  2108  	e.h = h.fixHash(v)
  2109  	e.payload.val = val
  2110  	e.payload.memoIdx = memoIdx
  2111  	h.size++
  2112  
  2113  	if h.needUpsize() {
  2114  		h.upsize(h.cap * uint64(loadFactor) * 2)
  2115  	}
  2116  	return nil
  2117  }
  2118  
  2119  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
  2120  // a valid entry being one which has had a value inserted into it.
  2121  func (h *Uint64HashTable) VisitEntries(visit func(*entryUint64)) {
  2122  	for _, e := range h.entries {
  2123  		if e.Valid() {
  2124  			visit(&e)
  2125  		}
  2126  	}
  2127  }
  2128  
  2129  // Uint64MemoTable is a wrapper over the appropriate hashtable to provide an interface
  2130  // conforming to the MemoTable interface defined in the encoding package for general interactions
  2131  // regarding dictionaries.
  2132  type Uint64MemoTable struct {
  2133  	tbl     *Uint64HashTable
  2134  	nullIdx int32
  2135  }
  2136  
  2137  // NewUint64MemoTable returns a new memotable with num entries pre-allocated to reduce further
  2138  // allocations when inserting.
  2139  func NewUint64MemoTable(num int64) *Uint64MemoTable {
  2140  	return &Uint64MemoTable{tbl: NewUint64HashTable(uint64(num)), nullIdx: KeyNotFound}
  2141  }
  2142  
  2143  func (Uint64MemoTable) TypeTraits() TypeTraits {
  2144  	return arrow.Uint64Traits
  2145  }
  2146  
  2147  // Reset allows this table to be re-used by dumping all the data currently in the table.
  2148  func (s *Uint64MemoTable) Reset() {
  2149  	s.tbl.Reset(32)
  2150  	s.nullIdx = KeyNotFound
  2151  }
  2152  
  2153  // Size returns the current number of inserted elements into the table including if a null
  2154  // has been inserted.
  2155  func (s *Uint64MemoTable) Size() int {
  2156  	sz := int(s.tbl.size)
  2157  	if _, ok := s.GetNull(); ok {
  2158  		sz++
  2159  	}
  2160  	return sz
  2161  }
  2162  
  2163  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
  2164  // that will be true if found and false if not.
  2165  func (s *Uint64MemoTable) GetNull() (int, bool) {
  2166  	return int(s.nullIdx), s.nullIdx != KeyNotFound
  2167  }
  2168  
  2169  // GetOrInsertNull will return the index of the null entry or insert a null entry
  2170  // if one currently doesn't exist. The found value will be true if there was already
  2171  // a null in the table, and false if it inserted one.
  2172  func (s *Uint64MemoTable) GetOrInsertNull() (idx int, found bool) {
  2173  	idx, found = s.GetNull()
  2174  	if !found {
  2175  		idx = s.Size()
  2176  		s.nullIdx = int32(idx)
  2177  	}
  2178  	return
  2179  }
  2180  
  2181  // CopyValues will copy the values from the memo table out into the passed in slice
  2182  // which must be of the appropriate type.
  2183  func (s *Uint64MemoTable) CopyValues(out interface{}) {
  2184  	s.CopyValuesSubset(0, out)
  2185  }
  2186  
  2187  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
  2188  // at the provided start index
  2189  func (s *Uint64MemoTable) CopyValuesSubset(start int, out interface{}) {
  2190  	s.tbl.CopyValuesSubset(start, out.([]uint64))
  2191  }
  2192  
  2193  func (s *Uint64MemoTable) WriteOut(out []byte) {
  2194  	s.tbl.CopyValues(arrow.Uint64Traits.CastFromBytes(out))
  2195  }
  2196  
  2197  func (s *Uint64MemoTable) WriteOutSubset(start int, out []byte) {
  2198  	s.tbl.CopyValuesSubset(start, arrow.Uint64Traits.CastFromBytes(out))
  2199  }
  2200  
  2201  func (s *Uint64MemoTable) WriteOutLE(out []byte) {
  2202  	s.tbl.WriteOut(out)
  2203  }
  2204  
  2205  func (s *Uint64MemoTable) WriteOutSubsetLE(start int, out []byte) {
  2206  	s.tbl.WriteOutSubset(start, out)
  2207  }
  2208  
  2209  // Get returns the index of the requested value in the hash table or KeyNotFound
  2210  // along with a boolean indicating if it was found or not.
  2211  func (s *Uint64MemoTable) Get(val interface{}) (int, bool) {
  2212  
  2213  	h := hashInt(uint64(val.(uint64)), 0)
  2214  	if e, ok := s.tbl.Lookup(h, func(v uint64) bool { return val.(uint64) == v }); ok {
  2215  		return int(e.payload.memoIdx), ok
  2216  	}
  2217  	return KeyNotFound, false
  2218  }
  2219  
  2220  // GetOrInsert will return the index of the specified value in the table, or insert the
  2221  // value into the table and return the new index. found indicates whether or not it already
  2222  // existed in the table (true) or was inserted by this call (false).
  2223  func (s *Uint64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
  2224  
  2225  	h := hashInt(uint64(val.(uint64)), 0)
  2226  	e, ok := s.tbl.Lookup(h, func(v uint64) bool {
  2227  		return val.(uint64) == v
  2228  	})
  2229  
  2230  	if ok {
  2231  		idx = int(e.payload.memoIdx)
  2232  		found = true
  2233  	} else {
  2234  		idx = s.Size()
  2235  		s.tbl.Insert(e, h, val.(uint64), int32(idx))
  2236  	}
  2237  	return
  2238  }
  2239  
  2240  // GetOrInsertBytes is unimplemented
  2241  func (s *Uint64MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
  2242  	panic("unimplemented")
  2243  }
  2244  
  2245  type payloadFloat32 struct {
  2246  	val     float32
  2247  	memoIdx int32
  2248  }
  2249  
  2250  type entryFloat32 struct {
  2251  	h       uint64
  2252  	payload payloadFloat32
  2253  }
  2254  
  2255  func (e entryFloat32) Valid() bool { return e.h != sentinel }
  2256  
  2257  // Float32HashTable is a hashtable specifically for float32 that
  2258  // is utilized with the MemoTable to generalize interactions for easier
  2259  // implementation of dictionaries without losing performance.
  2260  type Float32HashTable struct {
  2261  	cap     uint64
  2262  	capMask uint64
  2263  	size    uint64
  2264  
  2265  	entries []entryFloat32
  2266  }
  2267  
  2268  // NewFloat32HashTable returns a new hash table for float32 values
  2269  // initialized with the passed in capacity or 32 whichever is larger.
  2270  func NewFloat32HashTable(cap uint64) *Float32HashTable {
  2271  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  2272  	ret := &Float32HashTable{cap: initCap, capMask: initCap - 1, size: 0}
  2273  	ret.entries = make([]entryFloat32, initCap)
  2274  	return ret
  2275  }
  2276  
  2277  // Reset drops all of the values in this hash table and re-initializes it
  2278  // with the specified initial capacity as if by calling New, but without having
  2279  // to reallocate the object.
  2280  func (h *Float32HashTable) Reset(cap uint64) {
  2281  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  2282  	h.capMask = h.cap - 1
  2283  	h.size = 0
  2284  	h.entries = make([]entryFloat32, h.cap)
  2285  }
  2286  
  2287  // CopyValues is used for copying the values out of the hash table into the
  2288  // passed in slice, in the order that they were first inserted
  2289  func (h *Float32HashTable) CopyValues(out []float32) {
  2290  	h.CopyValuesSubset(0, out)
  2291  }
  2292  
  2293  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
  2294  // with the value at start, in the order that they were inserted.
  2295  func (h *Float32HashTable) CopyValuesSubset(start int, out []float32) {
  2296  	h.VisitEntries(func(e *entryFloat32) {
  2297  		idx := e.payload.memoIdx - int32(start)
  2298  		if idx >= 0 {
  2299  			out[idx] = e.payload.val
  2300  		}
  2301  	})
  2302  }
  2303  
  2304  func (h *Float32HashTable) WriteOut(out []byte) {
  2305  	h.WriteOutSubset(0, out)
  2306  }
  2307  
  2308  func (h *Float32HashTable) WriteOutSubset(start int, out []byte) {
  2309  	data := arrow.Float32Traits.CastFromBytes(out)
  2310  	h.VisitEntries(func(e *entryFloat32) {
  2311  		idx := e.payload.memoIdx - int32(start)
  2312  		if idx >= 0 {
  2313  			data[idx] = utils.ToLEFloat32(e.payload.val)
  2314  		}
  2315  	})
  2316  }
  2317  
  2318  func (h *Float32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
  2319  
  2320  func (Float32HashTable) fixHash(v uint64) uint64 {
  2321  	if v == sentinel {
  2322  		return 42
  2323  	}
  2324  	return v
  2325  }
  2326  
  2327  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
  2328  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
  2329  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
  2330  func (h *Float32HashTable) Lookup(v uint64, cmp func(float32) bool) (*entryFloat32, bool) {
  2331  	idx, ok := h.lookup(v, h.capMask, cmp)
  2332  	return &h.entries[idx], ok
  2333  }
  2334  
  2335  func (h *Float32HashTable) lookup(v uint64, szMask uint64, cmp func(float32) bool) (uint64, bool) {
  2336  	const perturbShift uint8 = 5
  2337  
  2338  	var (
  2339  		idx     uint64
  2340  		perturb uint64
  2341  		e       *entryFloat32
  2342  	)
  2343  
  2344  	v = h.fixHash(v)
  2345  	idx = v & szMask
  2346  	perturb = (v >> uint64(perturbShift)) + 1
  2347  
  2348  	for {
  2349  		e = &h.entries[idx]
  2350  		if e.h == v && cmp(e.payload.val) {
  2351  			return idx, true
  2352  		}
  2353  
  2354  		if e.h == sentinel {
  2355  			return idx, false
  2356  		}
  2357  
  2358  		// perturbation logic inspired from CPython's set/dict object
  2359  		// the goal is that all 64 bits of unmasked hash value eventually
  2360  		// participate int he probing sequence, to minimize clustering
  2361  		idx = (idx + perturb) & szMask
  2362  		perturb = (perturb >> uint64(perturbShift)) + 1
  2363  	}
  2364  }
  2365  
  2366  func (h *Float32HashTable) upsize(newcap uint64) error {
  2367  	newMask := newcap - 1
  2368  
  2369  	oldEntries := h.entries
  2370  	h.entries = make([]entryFloat32, newcap)
  2371  	for _, e := range oldEntries {
  2372  		if e.Valid() {
  2373  			idx, _ := h.lookup(e.h, newMask, func(float32) bool { return false })
  2374  			h.entries[idx] = e
  2375  		}
  2376  	}
  2377  	h.cap = newcap
  2378  	h.capMask = newMask
  2379  	return nil
  2380  }
  2381  
  2382  // Insert updates the given entry with the provided hash value, payload value and memo index.
  2383  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
  2384  func (h *Float32HashTable) Insert(e *entryFloat32, v uint64, val float32, memoIdx int32) error {
  2385  	e.h = h.fixHash(v)
  2386  	e.payload.val = val
  2387  	e.payload.memoIdx = memoIdx
  2388  	h.size++
  2389  
  2390  	if h.needUpsize() {
  2391  		h.upsize(h.cap * uint64(loadFactor) * 2)
  2392  	}
  2393  	return nil
  2394  }
  2395  
  2396  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
  2397  // a valid entry being one which has had a value inserted into it.
  2398  func (h *Float32HashTable) VisitEntries(visit func(*entryFloat32)) {
  2399  	for _, e := range h.entries {
  2400  		if e.Valid() {
  2401  			visit(&e)
  2402  		}
  2403  	}
  2404  }
  2405  
  2406  // Float32MemoTable is a wrapper over the appropriate hashtable to provide an interface
  2407  // conforming to the MemoTable interface defined in the encoding package for general interactions
  2408  // regarding dictionaries.
  2409  type Float32MemoTable struct {
  2410  	tbl     *Float32HashTable
  2411  	nullIdx int32
  2412  }
  2413  
  2414  // NewFloat32MemoTable returns a new memotable with num entries pre-allocated to reduce further
  2415  // allocations when inserting.
  2416  func NewFloat32MemoTable(num int64) *Float32MemoTable {
  2417  	return &Float32MemoTable{tbl: NewFloat32HashTable(uint64(num)), nullIdx: KeyNotFound}
  2418  }
  2419  
  2420  func (Float32MemoTable) TypeTraits() TypeTraits {
  2421  	return arrow.Float32Traits
  2422  }
  2423  
  2424  // Reset allows this table to be re-used by dumping all the data currently in the table.
  2425  func (s *Float32MemoTable) Reset() {
  2426  	s.tbl.Reset(32)
  2427  	s.nullIdx = KeyNotFound
  2428  }
  2429  
  2430  // Size returns the current number of inserted elements into the table including if a null
  2431  // has been inserted.
  2432  func (s *Float32MemoTable) Size() int {
  2433  	sz := int(s.tbl.size)
  2434  	if _, ok := s.GetNull(); ok {
  2435  		sz++
  2436  	}
  2437  	return sz
  2438  }
  2439  
  2440  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
  2441  // that will be true if found and false if not.
  2442  func (s *Float32MemoTable) GetNull() (int, bool) {
  2443  	return int(s.nullIdx), s.nullIdx != KeyNotFound
  2444  }
  2445  
  2446  // GetOrInsertNull will return the index of the null entry or insert a null entry
  2447  // if one currently doesn't exist. The found value will be true if there was already
  2448  // a null in the table, and false if it inserted one.
  2449  func (s *Float32MemoTable) GetOrInsertNull() (idx int, found bool) {
  2450  	idx, found = s.GetNull()
  2451  	if !found {
  2452  		idx = s.Size()
  2453  		s.nullIdx = int32(idx)
  2454  	}
  2455  	return
  2456  }
  2457  
  2458  // CopyValues will copy the values from the memo table out into the passed in slice
  2459  // which must be of the appropriate type.
  2460  func (s *Float32MemoTable) CopyValues(out interface{}) {
  2461  	s.CopyValuesSubset(0, out)
  2462  }
  2463  
  2464  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
  2465  // at the provided start index
  2466  func (s *Float32MemoTable) CopyValuesSubset(start int, out interface{}) {
  2467  	s.tbl.CopyValuesSubset(start, out.([]float32))
  2468  }
  2469  
  2470  func (s *Float32MemoTable) WriteOut(out []byte) {
  2471  	s.tbl.CopyValues(arrow.Float32Traits.CastFromBytes(out))
  2472  }
  2473  
  2474  func (s *Float32MemoTable) WriteOutSubset(start int, out []byte) {
  2475  	s.tbl.CopyValuesSubset(start, arrow.Float32Traits.CastFromBytes(out))
  2476  }
  2477  
  2478  func (s *Float32MemoTable) WriteOutLE(out []byte) {
  2479  	s.tbl.WriteOut(out)
  2480  }
  2481  
  2482  func (s *Float32MemoTable) WriteOutSubsetLE(start int, out []byte) {
  2483  	s.tbl.WriteOutSubset(start, out)
  2484  }
  2485  
  2486  // Get returns the index of the requested value in the hash table or KeyNotFound
  2487  // along with a boolean indicating if it was found or not.
  2488  func (s *Float32MemoTable) Get(val interface{}) (int, bool) {
  2489  	var cmp func(float32) bool
  2490  
  2491  	if math.IsNaN(float64(val.(float32))) {
  2492  		cmp = isNan32Cmp
  2493  		// use consistent internal bit pattern for NaN regardless of the pattern
  2494  		// that is passed to us. NaN is NaN is NaN
  2495  		val = float32(math.NaN())
  2496  	} else {
  2497  		cmp = func(v float32) bool { return val.(float32) == v }
  2498  	}
  2499  
  2500  	h := hashFloat32(val.(float32), 0)
  2501  	if e, ok := s.tbl.Lookup(h, cmp); ok {
  2502  		return int(e.payload.memoIdx), ok
  2503  	}
  2504  	return KeyNotFound, false
  2505  }
  2506  
  2507  // GetOrInsert will return the index of the specified value in the table, or insert the
  2508  // value into the table and return the new index. found indicates whether or not it already
  2509  // existed in the table (true) or was inserted by this call (false).
  2510  func (s *Float32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
  2511  
  2512  	var cmp func(float32) bool
  2513  
  2514  	if math.IsNaN(float64(val.(float32))) {
  2515  		cmp = isNan32Cmp
  2516  		// use consistent internal bit pattern for NaN regardless of the pattern
  2517  		// that is passed to us. NaN is NaN is NaN
  2518  		val = float32(math.NaN())
  2519  	} else {
  2520  		cmp = func(v float32) bool { return val.(float32) == v }
  2521  	}
  2522  
  2523  	h := hashFloat32(val.(float32), 0)
  2524  	e, ok := s.tbl.Lookup(h, cmp)
  2525  
  2526  	if ok {
  2527  		idx = int(e.payload.memoIdx)
  2528  		found = true
  2529  	} else {
  2530  		idx = s.Size()
  2531  		s.tbl.Insert(e, h, val.(float32), int32(idx))
  2532  	}
  2533  	return
  2534  }
  2535  
  2536  // GetOrInsertBytes is unimplemented
  2537  func (s *Float32MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
  2538  	panic("unimplemented")
  2539  }
  2540  
  2541  type payloadFloat64 struct {
  2542  	val     float64
  2543  	memoIdx int32
  2544  }
  2545  
  2546  type entryFloat64 struct {
  2547  	h       uint64
  2548  	payload payloadFloat64
  2549  }
  2550  
  2551  func (e entryFloat64) Valid() bool { return e.h != sentinel }
  2552  
  2553  // Float64HashTable is a hashtable specifically for float64 that
  2554  // is utilized with the MemoTable to generalize interactions for easier
  2555  // implementation of dictionaries without losing performance.
  2556  type Float64HashTable struct {
  2557  	cap     uint64
  2558  	capMask uint64
  2559  	size    uint64
  2560  
  2561  	entries []entryFloat64
  2562  }
  2563  
  2564  // NewFloat64HashTable returns a new hash table for float64 values
  2565  // initialized with the passed in capacity or 32 whichever is larger.
  2566  func NewFloat64HashTable(cap uint64) *Float64HashTable {
  2567  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  2568  	ret := &Float64HashTable{cap: initCap, capMask: initCap - 1, size: 0}
  2569  	ret.entries = make([]entryFloat64, initCap)
  2570  	return ret
  2571  }
  2572  
  2573  // Reset drops all of the values in this hash table and re-initializes it
  2574  // with the specified initial capacity as if by calling New, but without having
  2575  // to reallocate the object.
  2576  func (h *Float64HashTable) Reset(cap uint64) {
  2577  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  2578  	h.capMask = h.cap - 1
  2579  	h.size = 0
  2580  	h.entries = make([]entryFloat64, h.cap)
  2581  }
  2582  
  2583  // CopyValues is used for copying the values out of the hash table into the
  2584  // passed in slice, in the order that they were first inserted
  2585  func (h *Float64HashTable) CopyValues(out []float64) {
  2586  	h.CopyValuesSubset(0, out)
  2587  }
  2588  
  2589  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
  2590  // with the value at start, in the order that they were inserted.
  2591  func (h *Float64HashTable) CopyValuesSubset(start int, out []float64) {
  2592  	h.VisitEntries(func(e *entryFloat64) {
  2593  		idx := e.payload.memoIdx - int32(start)
  2594  		if idx >= 0 {
  2595  			out[idx] = e.payload.val
  2596  		}
  2597  	})
  2598  }
  2599  
  2600  func (h *Float64HashTable) WriteOut(out []byte) {
  2601  	h.WriteOutSubset(0, out)
  2602  }
  2603  
  2604  func (h *Float64HashTable) WriteOutSubset(start int, out []byte) {
  2605  	data := arrow.Float64Traits.CastFromBytes(out)
  2606  	h.VisitEntries(func(e *entryFloat64) {
  2607  		idx := e.payload.memoIdx - int32(start)
  2608  		if idx >= 0 {
  2609  			data[idx] = utils.ToLEFloat64(e.payload.val)
  2610  		}
  2611  	})
  2612  }
  2613  
  2614  func (h *Float64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
  2615  
  2616  func (Float64HashTable) fixHash(v uint64) uint64 {
  2617  	if v == sentinel {
  2618  		return 42
  2619  	}
  2620  	return v
  2621  }
  2622  
  2623  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
  2624  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
  2625  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
  2626  func (h *Float64HashTable) Lookup(v uint64, cmp func(float64) bool) (*entryFloat64, bool) {
  2627  	idx, ok := h.lookup(v, h.capMask, cmp)
  2628  	return &h.entries[idx], ok
  2629  }
  2630  
  2631  func (h *Float64HashTable) lookup(v uint64, szMask uint64, cmp func(float64) bool) (uint64, bool) {
  2632  	const perturbShift uint8 = 5
  2633  
  2634  	var (
  2635  		idx     uint64
  2636  		perturb uint64
  2637  		e       *entryFloat64
  2638  	)
  2639  
  2640  	v = h.fixHash(v)
  2641  	idx = v & szMask
  2642  	perturb = (v >> uint64(perturbShift)) + 1
  2643  
  2644  	for {
  2645  		e = &h.entries[idx]
  2646  		if e.h == v && cmp(e.payload.val) {
  2647  			return idx, true
  2648  		}
  2649  
  2650  		if e.h == sentinel {
  2651  			return idx, false
  2652  		}
  2653  
  2654  		// perturbation logic inspired from CPython's set/dict object
  2655  		// the goal is that all 64 bits of unmasked hash value eventually
  2656  		// participate int he probing sequence, to minimize clustering
  2657  		idx = (idx + perturb) & szMask
  2658  		perturb = (perturb >> uint64(perturbShift)) + 1
  2659  	}
  2660  }
  2661  
  2662  func (h *Float64HashTable) upsize(newcap uint64) error {
  2663  	newMask := newcap - 1
  2664  
  2665  	oldEntries := h.entries
  2666  	h.entries = make([]entryFloat64, newcap)
  2667  	for _, e := range oldEntries {
  2668  		if e.Valid() {
  2669  			idx, _ := h.lookup(e.h, newMask, func(float64) bool { return false })
  2670  			h.entries[idx] = e
  2671  		}
  2672  	}
  2673  	h.cap = newcap
  2674  	h.capMask = newMask
  2675  	return nil
  2676  }
  2677  
  2678  // Insert updates the given entry with the provided hash value, payload value and memo index.
  2679  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
  2680  func (h *Float64HashTable) Insert(e *entryFloat64, v uint64, val float64, memoIdx int32) error {
  2681  	e.h = h.fixHash(v)
  2682  	e.payload.val = val
  2683  	e.payload.memoIdx = memoIdx
  2684  	h.size++
  2685  
  2686  	if h.needUpsize() {
  2687  		h.upsize(h.cap * uint64(loadFactor) * 2)
  2688  	}
  2689  	return nil
  2690  }
  2691  
  2692  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
  2693  // a valid entry being one which has had a value inserted into it.
  2694  func (h *Float64HashTable) VisitEntries(visit func(*entryFloat64)) {
  2695  	for _, e := range h.entries {
  2696  		if e.Valid() {
  2697  			visit(&e)
  2698  		}
  2699  	}
  2700  }
  2701  
  2702  // Float64MemoTable is a wrapper over the appropriate hashtable to provide an interface
  2703  // conforming to the MemoTable interface defined in the encoding package for general interactions
  2704  // regarding dictionaries.
  2705  type Float64MemoTable struct {
  2706  	tbl     *Float64HashTable
  2707  	nullIdx int32
  2708  }
  2709  
  2710  // NewFloat64MemoTable returns a new memotable with num entries pre-allocated to reduce further
  2711  // allocations when inserting.
  2712  func NewFloat64MemoTable(num int64) *Float64MemoTable {
  2713  	return &Float64MemoTable{tbl: NewFloat64HashTable(uint64(num)), nullIdx: KeyNotFound}
  2714  }
  2715  
  2716  func (Float64MemoTable) TypeTraits() TypeTraits {
  2717  	return arrow.Float64Traits
  2718  }
  2719  
  2720  // Reset allows this table to be re-used by dumping all the data currently in the table.
  2721  func (s *Float64MemoTable) Reset() {
  2722  	s.tbl.Reset(32)
  2723  	s.nullIdx = KeyNotFound
  2724  }
  2725  
  2726  // Size returns the current number of inserted elements into the table including if a null
  2727  // has been inserted.
  2728  func (s *Float64MemoTable) Size() int {
  2729  	sz := int(s.tbl.size)
  2730  	if _, ok := s.GetNull(); ok {
  2731  		sz++
  2732  	}
  2733  	return sz
  2734  }
  2735  
  2736  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
  2737  // that will be true if found and false if not.
  2738  func (s *Float64MemoTable) GetNull() (int, bool) {
  2739  	return int(s.nullIdx), s.nullIdx != KeyNotFound
  2740  }
  2741  
  2742  // GetOrInsertNull will return the index of the null entry or insert a null entry
  2743  // if one currently doesn't exist. The found value will be true if there was already
  2744  // a null in the table, and false if it inserted one.
  2745  func (s *Float64MemoTable) GetOrInsertNull() (idx int, found bool) {
  2746  	idx, found = s.GetNull()
  2747  	if !found {
  2748  		idx = s.Size()
  2749  		s.nullIdx = int32(idx)
  2750  	}
  2751  	return
  2752  }
  2753  
  2754  // CopyValues will copy the values from the memo table out into the passed in slice
  2755  // which must be of the appropriate type.
  2756  func (s *Float64MemoTable) CopyValues(out interface{}) {
  2757  	s.CopyValuesSubset(0, out)
  2758  }
  2759  
  2760  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
  2761  // at the provided start index
  2762  func (s *Float64MemoTable) CopyValuesSubset(start int, out interface{}) {
  2763  	s.tbl.CopyValuesSubset(start, out.([]float64))
  2764  }
  2765  
  2766  func (s *Float64MemoTable) WriteOut(out []byte) {
  2767  	s.tbl.CopyValues(arrow.Float64Traits.CastFromBytes(out))
  2768  }
  2769  
  2770  func (s *Float64MemoTable) WriteOutSubset(start int, out []byte) {
  2771  	s.tbl.CopyValuesSubset(start, arrow.Float64Traits.CastFromBytes(out))
  2772  }
  2773  
  2774  func (s *Float64MemoTable) WriteOutLE(out []byte) {
  2775  	s.tbl.WriteOut(out)
  2776  }
  2777  
  2778  func (s *Float64MemoTable) WriteOutSubsetLE(start int, out []byte) {
  2779  	s.tbl.WriteOutSubset(start, out)
  2780  }
  2781  
  2782  // Get returns the index of the requested value in the hash table or KeyNotFound
  2783  // along with a boolean indicating if it was found or not.
  2784  func (s *Float64MemoTable) Get(val interface{}) (int, bool) {
  2785  	var cmp func(float64) bool
  2786  	if math.IsNaN(val.(float64)) {
  2787  		cmp = math.IsNaN
  2788  		// use consistent internal bit pattern for NaN regardless of the pattern
  2789  		// that is passed to us. NaN is NaN is NaN
  2790  		val = math.NaN()
  2791  	} else {
  2792  		cmp = func(v float64) bool { return val.(float64) == v }
  2793  	}
  2794  
  2795  	h := hashFloat64(val.(float64), 0)
  2796  	if e, ok := s.tbl.Lookup(h, cmp); ok {
  2797  		return int(e.payload.memoIdx), ok
  2798  	}
  2799  	return KeyNotFound, false
  2800  }
  2801  
  2802  // GetOrInsert will return the index of the specified value in the table, or insert the
  2803  // value into the table and return the new index. found indicates whether or not it already
  2804  // existed in the table (true) or was inserted by this call (false).
  2805  func (s *Float64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
  2806  
  2807  	var cmp func(float64) bool
  2808  	if math.IsNaN(val.(float64)) {
  2809  		cmp = math.IsNaN
  2810  		// use consistent internal bit pattern for NaN regardless of the pattern
  2811  		// that is passed to us. NaN is NaN is NaN
  2812  		val = math.NaN()
  2813  	} else {
  2814  		cmp = func(v float64) bool { return val.(float64) == v }
  2815  	}
  2816  
  2817  	h := hashFloat64(val.(float64), 0)
  2818  	e, ok := s.tbl.Lookup(h, cmp)
  2819  
  2820  	if ok {
  2821  		idx = int(e.payload.memoIdx)
  2822  		found = true
  2823  	} else {
  2824  		idx = s.Size()
  2825  		s.tbl.Insert(e, h, val.(float64), int32(idx))
  2826  	}
  2827  	return
  2828  }
  2829  
  2830  // GetOrInsertBytes is unimplemented
  2831  func (s *Float64MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
  2832  	panic("unimplemented")
  2833  }