github.com/apache/arrow/go/v10@v10.0.1/internal/hashing/xxh3_memo_table.gen.go (about)

     1  // Code generated by xxh3_memo_table.gen.go.tmpl. DO NOT EDIT.
     2  
     3  // Licensed to the Apache Software Foundation (ASF) under one
     4  // or more contributor license agreements.  See the NOTICE file
     5  // distributed with this work for additional information
     6  // regarding copyright ownership.  The ASF licenses this file
     7  // to you under the Apache License, Version 2.0 (the
     8  // "License"); you may not use this file except in compliance
     9  // with the License.  You may obtain a copy of the License at
    10  //
    11  // http://www.apache.org/licenses/LICENSE-2.0
    12  //
    13  // Unless required by applicable law or agreed to in writing, software
    14  // distributed under the License is distributed on an "AS IS" BASIS,
    15  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  // See the License for the specific language governing permissions and
    17  // limitations under the License.
    18  
    19  package hashing
    20  
    21  import (
    22  	"math"
    23  
    24  	"github.com/apache/arrow/go/v10/arrow"
    25  	"github.com/apache/arrow/go/v10/arrow/bitutil"
    26  	"github.com/apache/arrow/go/v10/internal/utils"
    27  )
    28  
    29  type payloadInt8 struct {
    30  	val     int8
    31  	memoIdx int32
    32  }
    33  
    34  type entryInt8 struct {
    35  	h       uint64
    36  	payload payloadInt8
    37  }
    38  
    39  func (e entryInt8) Valid() bool { return e.h != sentinel }
    40  
    41  // Int8HashTable is a hashtable specifically for int8 that
    42  // is utilized with the MemoTable to generalize interactions for easier
    43  // implementation of dictionaries without losing performance.
    44  type Int8HashTable struct {
    45  	cap     uint64
    46  	capMask uint64
    47  	size    uint64
    48  
    49  	entries []entryInt8
    50  }
    51  
    52  // NewInt8HashTable returns a new hash table for int8 values
    53  // initialized with the passed in capacity or 32 whichever is larger.
    54  func NewInt8HashTable(cap uint64) *Int8HashTable {
    55  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
    56  	ret := &Int8HashTable{cap: initCap, capMask: initCap - 1, size: 0}
    57  	ret.entries = make([]entryInt8, initCap)
    58  	return ret
    59  }
    60  
    61  // Reset drops all of the values in this hash table and re-initializes it
    62  // with the specified initial capacity as if by calling New, but without having
    63  // to reallocate the object.
    64  func (h *Int8HashTable) Reset(cap uint64) {
    65  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
    66  	h.capMask = h.cap - 1
    67  	h.size = 0
    68  	h.entries = make([]entryInt8, h.cap)
    69  }
    70  
    71  // CopyValues is used for copying the values out of the hash table into the
    72  // passed in slice, in the order that they were first inserted
    73  func (h *Int8HashTable) CopyValues(out []int8) {
    74  	h.CopyValuesSubset(0, out)
    75  }
    76  
    77  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
    78  // with the value at start, in the order that they were inserted.
    79  func (h *Int8HashTable) CopyValuesSubset(start int, out []int8) {
    80  	h.VisitEntries(func(e *entryInt8) {
    81  		idx := e.payload.memoIdx - int32(start)
    82  		if idx >= 0 {
    83  			out[idx] = e.payload.val
    84  		}
    85  	})
    86  }
    87  
    88  func (h *Int8HashTable) WriteOut(out []byte) {
    89  	h.WriteOutSubset(0, out)
    90  }
    91  
    92  func (h *Int8HashTable) WriteOutSubset(start int, out []byte) {
    93  	data := arrow.Int8Traits.CastFromBytes(out)
    94  	h.VisitEntries(func(e *entryInt8) {
    95  		idx := e.payload.memoIdx - int32(start)
    96  		if idx >= 0 {
    97  			data[idx] = e.payload.val
    98  		}
    99  	})
   100  }
   101  
   102  func (h *Int8HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
   103  
   104  func (Int8HashTable) fixHash(v uint64) uint64 {
   105  	if v == sentinel {
   106  		return 42
   107  	}
   108  	return v
   109  }
   110  
   111  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
   112  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
   113  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
   114  func (h *Int8HashTable) Lookup(v uint64, cmp func(int8) bool) (*entryInt8, bool) {
   115  	idx, ok := h.lookup(v, h.capMask, cmp)
   116  	return &h.entries[idx], ok
   117  }
   118  
   119  func (h *Int8HashTable) lookup(v uint64, szMask uint64, cmp func(int8) bool) (uint64, bool) {
   120  	const perturbShift uint8 = 5
   121  
   122  	var (
   123  		idx     uint64
   124  		perturb uint64
   125  		e       *entryInt8
   126  	)
   127  
   128  	v = h.fixHash(v)
   129  	idx = v & szMask
   130  	perturb = (v >> uint64(perturbShift)) + 1
   131  
   132  	for {
   133  		e = &h.entries[idx]
   134  		if e.h == v && cmp(e.payload.val) {
   135  			return idx, true
   136  		}
   137  
   138  		if e.h == sentinel {
   139  			return idx, false
   140  		}
   141  
   142  		// perturbation logic inspired from CPython's set/dict object
   143  		// the goal is that all 64 bits of unmasked hash value eventually
   144  		// participate int he probing sequence, to minimize clustering
   145  		idx = (idx + perturb) & szMask
   146  		perturb = (perturb >> uint64(perturbShift)) + 1
   147  	}
   148  }
   149  
   150  func (h *Int8HashTable) upsize(newcap uint64) error {
   151  	newMask := newcap - 1
   152  
   153  	oldEntries := h.entries
   154  	h.entries = make([]entryInt8, newcap)
   155  	for _, e := range oldEntries {
   156  		if e.Valid() {
   157  			idx, _ := h.lookup(e.h, newMask, func(int8) bool { return false })
   158  			h.entries[idx] = e
   159  		}
   160  	}
   161  	h.cap = newcap
   162  	h.capMask = newMask
   163  	return nil
   164  }
   165  
   166  // Insert updates the given entry with the provided hash value, payload value and memo index.
   167  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
   168  func (h *Int8HashTable) Insert(e *entryInt8, v uint64, val int8, memoIdx int32) error {
   169  	e.h = h.fixHash(v)
   170  	e.payload.val = val
   171  	e.payload.memoIdx = memoIdx
   172  	h.size++
   173  
   174  	if h.needUpsize() {
   175  		h.upsize(h.cap * uint64(loadFactor) * 2)
   176  	}
   177  	return nil
   178  }
   179  
   180  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
   181  // a valid entry being one which has had a value inserted into it.
   182  func (h *Int8HashTable) VisitEntries(visit func(*entryInt8)) {
   183  	for _, e := range h.entries {
   184  		if e.Valid() {
   185  			visit(&e)
   186  		}
   187  	}
   188  }
   189  
   190  // Int8MemoTable is a wrapper over the appropriate hashtable to provide an interface
   191  // conforming to the MemoTable interface defined in the encoding package for general interactions
   192  // regarding dictionaries.
   193  type Int8MemoTable struct {
   194  	tbl     *Int8HashTable
   195  	nullIdx int32
   196  }
   197  
   198  // NewInt8MemoTable returns a new memotable with num entries pre-allocated to reduce further
   199  // allocations when inserting.
   200  func NewInt8MemoTable(num int64) *Int8MemoTable {
   201  	return &Int8MemoTable{tbl: NewInt8HashTable(uint64(num)), nullIdx: KeyNotFound}
   202  }
   203  
   204  func (Int8MemoTable) TypeTraits() TypeTraits {
   205  	return arrow.Int8Traits
   206  }
   207  
   208  // Reset allows this table to be re-used by dumping all the data currently in the table.
   209  func (s *Int8MemoTable) Reset() {
   210  	s.tbl.Reset(32)
   211  	s.nullIdx = KeyNotFound
   212  }
   213  
   214  // Size returns the current number of inserted elements into the table including if a null
   215  // has been inserted.
   216  func (s *Int8MemoTable) Size() int {
   217  	sz := int(s.tbl.size)
   218  	if _, ok := s.GetNull(); ok {
   219  		sz++
   220  	}
   221  	return sz
   222  }
   223  
   224  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
   225  // that will be true if found and false if not.
   226  func (s *Int8MemoTable) GetNull() (int, bool) {
   227  	return int(s.nullIdx), s.nullIdx != KeyNotFound
   228  }
   229  
   230  // GetOrInsertNull will return the index of the null entry or insert a null entry
   231  // if one currently doesn't exist. The found value will be true if there was already
   232  // a null in the table, and false if it inserted one.
   233  func (s *Int8MemoTable) GetOrInsertNull() (idx int, found bool) {
   234  	idx, found = s.GetNull()
   235  	if !found {
   236  		idx = s.Size()
   237  		s.nullIdx = int32(idx)
   238  	}
   239  	return
   240  }
   241  
   242  // CopyValues will copy the values from the memo table out into the passed in slice
   243  // which must be of the appropriate type.
   244  func (s *Int8MemoTable) CopyValues(out interface{}) {
   245  	s.CopyValuesSubset(0, out)
   246  }
   247  
   248  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
   249  // at the provided start index
   250  func (s *Int8MemoTable) CopyValuesSubset(start int, out interface{}) {
   251  	s.tbl.CopyValuesSubset(start, out.([]int8))
   252  }
   253  
   254  func (s *Int8MemoTable) WriteOut(out []byte) {
   255  	s.tbl.CopyValues(arrow.Int8Traits.CastFromBytes(out))
   256  }
   257  
   258  func (s *Int8MemoTable) WriteOutSubset(start int, out []byte) {
   259  	s.tbl.CopyValuesSubset(start, arrow.Int8Traits.CastFromBytes(out))
   260  }
   261  
   262  func (s *Int8MemoTable) WriteOutLE(out []byte) {
   263  	s.tbl.WriteOut(out)
   264  }
   265  
   266  func (s *Int8MemoTable) WriteOutSubsetLE(start int, out []byte) {
   267  	s.tbl.WriteOutSubset(start, out)
   268  }
   269  
   270  // Get returns the index of the requested value in the hash table or KeyNotFound
   271  // along with a boolean indicating if it was found or not.
   272  func (s *Int8MemoTable) Get(val interface{}) (int, bool) {
   273  
   274  	h := hashInt(uint64(val.(int8)), 0)
   275  	if e, ok := s.tbl.Lookup(h, func(v int8) bool { return val.(int8) == v }); ok {
   276  		return int(e.payload.memoIdx), ok
   277  	}
   278  	return KeyNotFound, false
   279  }
   280  
   281  // GetOrInsert will return the index of the specified value in the table, or insert the
   282  // value into the table and return the new index. found indicates whether or not it already
   283  // existed in the table (true) or was inserted by this call (false).
   284  func (s *Int8MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
   285  
   286  	h := hashInt(uint64(val.(int8)), 0)
   287  	e, ok := s.tbl.Lookup(h, func(v int8) bool {
   288  		return val.(int8) == v
   289  	})
   290  
   291  	if ok {
   292  		idx = int(e.payload.memoIdx)
   293  		found = true
   294  	} else {
   295  		idx = s.Size()
   296  		s.tbl.Insert(e, h, val.(int8), int32(idx))
   297  	}
   298  	return
   299  }
   300  
   301  type payloadUint8 struct {
   302  	val     uint8
   303  	memoIdx int32
   304  }
   305  
   306  type entryUint8 struct {
   307  	h       uint64
   308  	payload payloadUint8
   309  }
   310  
   311  func (e entryUint8) Valid() bool { return e.h != sentinel }
   312  
   313  // Uint8HashTable is a hashtable specifically for uint8 that
   314  // is utilized with the MemoTable to generalize interactions for easier
   315  // implementation of dictionaries without losing performance.
   316  type Uint8HashTable struct {
   317  	cap     uint64
   318  	capMask uint64
   319  	size    uint64
   320  
   321  	entries []entryUint8
   322  }
   323  
   324  // NewUint8HashTable returns a new hash table for uint8 values
   325  // initialized with the passed in capacity or 32 whichever is larger.
   326  func NewUint8HashTable(cap uint64) *Uint8HashTable {
   327  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
   328  	ret := &Uint8HashTable{cap: initCap, capMask: initCap - 1, size: 0}
   329  	ret.entries = make([]entryUint8, initCap)
   330  	return ret
   331  }
   332  
   333  // Reset drops all of the values in this hash table and re-initializes it
   334  // with the specified initial capacity as if by calling New, but without having
   335  // to reallocate the object.
   336  func (h *Uint8HashTable) Reset(cap uint64) {
   337  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
   338  	h.capMask = h.cap - 1
   339  	h.size = 0
   340  	h.entries = make([]entryUint8, h.cap)
   341  }
   342  
   343  // CopyValues is used for copying the values out of the hash table into the
   344  // passed in slice, in the order that they were first inserted
   345  func (h *Uint8HashTable) CopyValues(out []uint8) {
   346  	h.CopyValuesSubset(0, out)
   347  }
   348  
   349  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
   350  // with the value at start, in the order that they were inserted.
   351  func (h *Uint8HashTable) CopyValuesSubset(start int, out []uint8) {
   352  	h.VisitEntries(func(e *entryUint8) {
   353  		idx := e.payload.memoIdx - int32(start)
   354  		if idx >= 0 {
   355  			out[idx] = e.payload.val
   356  		}
   357  	})
   358  }
   359  
   360  func (h *Uint8HashTable) WriteOut(out []byte) {
   361  	h.WriteOutSubset(0, out)
   362  }
   363  
   364  func (h *Uint8HashTable) WriteOutSubset(start int, out []byte) {
   365  	data := arrow.Uint8Traits.CastFromBytes(out)
   366  	h.VisitEntries(func(e *entryUint8) {
   367  		idx := e.payload.memoIdx - int32(start)
   368  		if idx >= 0 {
   369  			data[idx] = e.payload.val
   370  		}
   371  	})
   372  }
   373  
   374  func (h *Uint8HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
   375  
   376  func (Uint8HashTable) fixHash(v uint64) uint64 {
   377  	if v == sentinel {
   378  		return 42
   379  	}
   380  	return v
   381  }
   382  
   383  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
   384  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
   385  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
   386  func (h *Uint8HashTable) Lookup(v uint64, cmp func(uint8) bool) (*entryUint8, bool) {
   387  	idx, ok := h.lookup(v, h.capMask, cmp)
   388  	return &h.entries[idx], ok
   389  }
   390  
   391  func (h *Uint8HashTable) lookup(v uint64, szMask uint64, cmp func(uint8) bool) (uint64, bool) {
   392  	const perturbShift uint8 = 5
   393  
   394  	var (
   395  		idx     uint64
   396  		perturb uint64
   397  		e       *entryUint8
   398  	)
   399  
   400  	v = h.fixHash(v)
   401  	idx = v & szMask
   402  	perturb = (v >> uint64(perturbShift)) + 1
   403  
   404  	for {
   405  		e = &h.entries[idx]
   406  		if e.h == v && cmp(e.payload.val) {
   407  			return idx, true
   408  		}
   409  
   410  		if e.h == sentinel {
   411  			return idx, false
   412  		}
   413  
   414  		// perturbation logic inspired from CPython's set/dict object
   415  		// the goal is that all 64 bits of unmasked hash value eventually
   416  		// participate int he probing sequence, to minimize clustering
   417  		idx = (idx + perturb) & szMask
   418  		perturb = (perturb >> uint64(perturbShift)) + 1
   419  	}
   420  }
   421  
   422  func (h *Uint8HashTable) upsize(newcap uint64) error {
   423  	newMask := newcap - 1
   424  
   425  	oldEntries := h.entries
   426  	h.entries = make([]entryUint8, newcap)
   427  	for _, e := range oldEntries {
   428  		if e.Valid() {
   429  			idx, _ := h.lookup(e.h, newMask, func(uint8) bool { return false })
   430  			h.entries[idx] = e
   431  		}
   432  	}
   433  	h.cap = newcap
   434  	h.capMask = newMask
   435  	return nil
   436  }
   437  
   438  // Insert updates the given entry with the provided hash value, payload value and memo index.
   439  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
   440  func (h *Uint8HashTable) Insert(e *entryUint8, v uint64, val uint8, memoIdx int32) error {
   441  	e.h = h.fixHash(v)
   442  	e.payload.val = val
   443  	e.payload.memoIdx = memoIdx
   444  	h.size++
   445  
   446  	if h.needUpsize() {
   447  		h.upsize(h.cap * uint64(loadFactor) * 2)
   448  	}
   449  	return nil
   450  }
   451  
   452  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
   453  // a valid entry being one which has had a value inserted into it.
   454  func (h *Uint8HashTable) VisitEntries(visit func(*entryUint8)) {
   455  	for _, e := range h.entries {
   456  		if e.Valid() {
   457  			visit(&e)
   458  		}
   459  	}
   460  }
   461  
   462  // Uint8MemoTable is a wrapper over the appropriate hashtable to provide an interface
   463  // conforming to the MemoTable interface defined in the encoding package for general interactions
   464  // regarding dictionaries.
   465  type Uint8MemoTable struct {
   466  	tbl     *Uint8HashTable
   467  	nullIdx int32
   468  }
   469  
   470  // NewUint8MemoTable returns a new memotable with num entries pre-allocated to reduce further
   471  // allocations when inserting.
   472  func NewUint8MemoTable(num int64) *Uint8MemoTable {
   473  	return &Uint8MemoTable{tbl: NewUint8HashTable(uint64(num)), nullIdx: KeyNotFound}
   474  }
   475  
   476  func (Uint8MemoTable) TypeTraits() TypeTraits {
   477  	return arrow.Uint8Traits
   478  }
   479  
   480  // Reset allows this table to be re-used by dumping all the data currently in the table.
   481  func (s *Uint8MemoTable) Reset() {
   482  	s.tbl.Reset(32)
   483  	s.nullIdx = KeyNotFound
   484  }
   485  
   486  // Size returns the current number of inserted elements into the table including if a null
   487  // has been inserted.
   488  func (s *Uint8MemoTable) Size() int {
   489  	sz := int(s.tbl.size)
   490  	if _, ok := s.GetNull(); ok {
   491  		sz++
   492  	}
   493  	return sz
   494  }
   495  
   496  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
   497  // that will be true if found and false if not.
   498  func (s *Uint8MemoTable) GetNull() (int, bool) {
   499  	return int(s.nullIdx), s.nullIdx != KeyNotFound
   500  }
   501  
   502  // GetOrInsertNull will return the index of the null entry or insert a null entry
   503  // if one currently doesn't exist. The found value will be true if there was already
   504  // a null in the table, and false if it inserted one.
   505  func (s *Uint8MemoTable) GetOrInsertNull() (idx int, found bool) {
   506  	idx, found = s.GetNull()
   507  	if !found {
   508  		idx = s.Size()
   509  		s.nullIdx = int32(idx)
   510  	}
   511  	return
   512  }
   513  
   514  // CopyValues will copy the values from the memo table out into the passed in slice
   515  // which must be of the appropriate type.
   516  func (s *Uint8MemoTable) CopyValues(out interface{}) {
   517  	s.CopyValuesSubset(0, out)
   518  }
   519  
   520  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
   521  // at the provided start index
   522  func (s *Uint8MemoTable) CopyValuesSubset(start int, out interface{}) {
   523  	s.tbl.CopyValuesSubset(start, out.([]uint8))
   524  }
   525  
   526  func (s *Uint8MemoTable) WriteOut(out []byte) {
   527  	s.tbl.CopyValues(arrow.Uint8Traits.CastFromBytes(out))
   528  }
   529  
   530  func (s *Uint8MemoTable) WriteOutSubset(start int, out []byte) {
   531  	s.tbl.CopyValuesSubset(start, arrow.Uint8Traits.CastFromBytes(out))
   532  }
   533  
   534  func (s *Uint8MemoTable) WriteOutLE(out []byte) {
   535  	s.tbl.WriteOut(out)
   536  }
   537  
   538  func (s *Uint8MemoTable) WriteOutSubsetLE(start int, out []byte) {
   539  	s.tbl.WriteOutSubset(start, out)
   540  }
   541  
   542  // Get returns the index of the requested value in the hash table or KeyNotFound
   543  // along with a boolean indicating if it was found or not.
   544  func (s *Uint8MemoTable) Get(val interface{}) (int, bool) {
   545  
   546  	h := hashInt(uint64(val.(uint8)), 0)
   547  	if e, ok := s.tbl.Lookup(h, func(v uint8) bool { return val.(uint8) == v }); ok {
   548  		return int(e.payload.memoIdx), ok
   549  	}
   550  	return KeyNotFound, false
   551  }
   552  
   553  // GetOrInsert will return the index of the specified value in the table, or insert the
   554  // value into the table and return the new index. found indicates whether or not it already
   555  // existed in the table (true) or was inserted by this call (false).
   556  func (s *Uint8MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
   557  
   558  	h := hashInt(uint64(val.(uint8)), 0)
   559  	e, ok := s.tbl.Lookup(h, func(v uint8) bool {
   560  		return val.(uint8) == v
   561  	})
   562  
   563  	if ok {
   564  		idx = int(e.payload.memoIdx)
   565  		found = true
   566  	} else {
   567  		idx = s.Size()
   568  		s.tbl.Insert(e, h, val.(uint8), int32(idx))
   569  	}
   570  	return
   571  }
   572  
   573  type payloadInt16 struct {
   574  	val     int16
   575  	memoIdx int32
   576  }
   577  
   578  type entryInt16 struct {
   579  	h       uint64
   580  	payload payloadInt16
   581  }
   582  
   583  func (e entryInt16) Valid() bool { return e.h != sentinel }
   584  
   585  // Int16HashTable is a hashtable specifically for int16 that
   586  // is utilized with the MemoTable to generalize interactions for easier
   587  // implementation of dictionaries without losing performance.
   588  type Int16HashTable struct {
   589  	cap     uint64
   590  	capMask uint64
   591  	size    uint64
   592  
   593  	entries []entryInt16
   594  }
   595  
   596  // NewInt16HashTable returns a new hash table for int16 values
   597  // initialized with the passed in capacity or 32 whichever is larger.
   598  func NewInt16HashTable(cap uint64) *Int16HashTable {
   599  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
   600  	ret := &Int16HashTable{cap: initCap, capMask: initCap - 1, size: 0}
   601  	ret.entries = make([]entryInt16, initCap)
   602  	return ret
   603  }
   604  
   605  // Reset drops all of the values in this hash table and re-initializes it
   606  // with the specified initial capacity as if by calling New, but without having
   607  // to reallocate the object.
   608  func (h *Int16HashTable) Reset(cap uint64) {
   609  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
   610  	h.capMask = h.cap - 1
   611  	h.size = 0
   612  	h.entries = make([]entryInt16, h.cap)
   613  }
   614  
   615  // CopyValues is used for copying the values out of the hash table into the
   616  // passed in slice, in the order that they were first inserted
   617  func (h *Int16HashTable) CopyValues(out []int16) {
   618  	h.CopyValuesSubset(0, out)
   619  }
   620  
   621  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
   622  // with the value at start, in the order that they were inserted.
   623  func (h *Int16HashTable) CopyValuesSubset(start int, out []int16) {
   624  	h.VisitEntries(func(e *entryInt16) {
   625  		idx := e.payload.memoIdx - int32(start)
   626  		if idx >= 0 {
   627  			out[idx] = e.payload.val
   628  		}
   629  	})
   630  }
   631  
   632  func (h *Int16HashTable) WriteOut(out []byte) {
   633  	h.WriteOutSubset(0, out)
   634  }
   635  
   636  func (h *Int16HashTable) WriteOutSubset(start int, out []byte) {
   637  	data := arrow.Int16Traits.CastFromBytes(out)
   638  	h.VisitEntries(func(e *entryInt16) {
   639  		idx := e.payload.memoIdx - int32(start)
   640  		if idx >= 0 {
   641  			data[idx] = utils.ToLEInt16(e.payload.val)
   642  		}
   643  	})
   644  }
   645  
   646  func (h *Int16HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
   647  
   648  func (Int16HashTable) fixHash(v uint64) uint64 {
   649  	if v == sentinel {
   650  		return 42
   651  	}
   652  	return v
   653  }
   654  
   655  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
   656  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
   657  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
   658  func (h *Int16HashTable) Lookup(v uint64, cmp func(int16) bool) (*entryInt16, bool) {
   659  	idx, ok := h.lookup(v, h.capMask, cmp)
   660  	return &h.entries[idx], ok
   661  }
   662  
   663  func (h *Int16HashTable) lookup(v uint64, szMask uint64, cmp func(int16) bool) (uint64, bool) {
   664  	const perturbShift uint8 = 5
   665  
   666  	var (
   667  		idx     uint64
   668  		perturb uint64
   669  		e       *entryInt16
   670  	)
   671  
   672  	v = h.fixHash(v)
   673  	idx = v & szMask
   674  	perturb = (v >> uint64(perturbShift)) + 1
   675  
   676  	for {
   677  		e = &h.entries[idx]
   678  		if e.h == v && cmp(e.payload.val) {
   679  			return idx, true
   680  		}
   681  
   682  		if e.h == sentinel {
   683  			return idx, false
   684  		}
   685  
   686  		// perturbation logic inspired from CPython's set/dict object
   687  		// the goal is that all 64 bits of unmasked hash value eventually
   688  		// participate int he probing sequence, to minimize clustering
   689  		idx = (idx + perturb) & szMask
   690  		perturb = (perturb >> uint64(perturbShift)) + 1
   691  	}
   692  }
   693  
   694  func (h *Int16HashTable) upsize(newcap uint64) error {
   695  	newMask := newcap - 1
   696  
   697  	oldEntries := h.entries
   698  	h.entries = make([]entryInt16, newcap)
   699  	for _, e := range oldEntries {
   700  		if e.Valid() {
   701  			idx, _ := h.lookup(e.h, newMask, func(int16) bool { return false })
   702  			h.entries[idx] = e
   703  		}
   704  	}
   705  	h.cap = newcap
   706  	h.capMask = newMask
   707  	return nil
   708  }
   709  
   710  // Insert updates the given entry with the provided hash value, payload value and memo index.
   711  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
   712  func (h *Int16HashTable) Insert(e *entryInt16, v uint64, val int16, memoIdx int32) error {
   713  	e.h = h.fixHash(v)
   714  	e.payload.val = val
   715  	e.payload.memoIdx = memoIdx
   716  	h.size++
   717  
   718  	if h.needUpsize() {
   719  		h.upsize(h.cap * uint64(loadFactor) * 2)
   720  	}
   721  	return nil
   722  }
   723  
   724  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
   725  // a valid entry being one which has had a value inserted into it.
   726  func (h *Int16HashTable) VisitEntries(visit func(*entryInt16)) {
   727  	for _, e := range h.entries {
   728  		if e.Valid() {
   729  			visit(&e)
   730  		}
   731  	}
   732  }
   733  
   734  // Int16MemoTable is a wrapper over the appropriate hashtable to provide an interface
   735  // conforming to the MemoTable interface defined in the encoding package for general interactions
   736  // regarding dictionaries.
   737  type Int16MemoTable struct {
   738  	tbl     *Int16HashTable
   739  	nullIdx int32
   740  }
   741  
   742  // NewInt16MemoTable returns a new memotable with num entries pre-allocated to reduce further
   743  // allocations when inserting.
   744  func NewInt16MemoTable(num int64) *Int16MemoTable {
   745  	return &Int16MemoTable{tbl: NewInt16HashTable(uint64(num)), nullIdx: KeyNotFound}
   746  }
   747  
   748  func (Int16MemoTable) TypeTraits() TypeTraits {
   749  	return arrow.Int16Traits
   750  }
   751  
   752  // Reset allows this table to be re-used by dumping all the data currently in the table.
   753  func (s *Int16MemoTable) Reset() {
   754  	s.tbl.Reset(32)
   755  	s.nullIdx = KeyNotFound
   756  }
   757  
   758  // Size returns the current number of inserted elements into the table including if a null
   759  // has been inserted.
   760  func (s *Int16MemoTable) Size() int {
   761  	sz := int(s.tbl.size)
   762  	if _, ok := s.GetNull(); ok {
   763  		sz++
   764  	}
   765  	return sz
   766  }
   767  
   768  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
   769  // that will be true if found and false if not.
   770  func (s *Int16MemoTable) GetNull() (int, bool) {
   771  	return int(s.nullIdx), s.nullIdx != KeyNotFound
   772  }
   773  
   774  // GetOrInsertNull will return the index of the null entry or insert a null entry
   775  // if one currently doesn't exist. The found value will be true if there was already
   776  // a null in the table, and false if it inserted one.
   777  func (s *Int16MemoTable) GetOrInsertNull() (idx int, found bool) {
   778  	idx, found = s.GetNull()
   779  	if !found {
   780  		idx = s.Size()
   781  		s.nullIdx = int32(idx)
   782  	}
   783  	return
   784  }
   785  
   786  // CopyValues will copy the values from the memo table out into the passed in slice
   787  // which must be of the appropriate type.
   788  func (s *Int16MemoTable) CopyValues(out interface{}) {
   789  	s.CopyValuesSubset(0, out)
   790  }
   791  
   792  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
   793  // at the provided start index
   794  func (s *Int16MemoTable) CopyValuesSubset(start int, out interface{}) {
   795  	s.tbl.CopyValuesSubset(start, out.([]int16))
   796  }
   797  
   798  func (s *Int16MemoTable) WriteOut(out []byte) {
   799  	s.tbl.CopyValues(arrow.Int16Traits.CastFromBytes(out))
   800  }
   801  
   802  func (s *Int16MemoTable) WriteOutSubset(start int, out []byte) {
   803  	s.tbl.CopyValuesSubset(start, arrow.Int16Traits.CastFromBytes(out))
   804  }
   805  
   806  func (s *Int16MemoTable) WriteOutLE(out []byte) {
   807  	s.tbl.WriteOut(out)
   808  }
   809  
   810  func (s *Int16MemoTable) WriteOutSubsetLE(start int, out []byte) {
   811  	s.tbl.WriteOutSubset(start, out)
   812  }
   813  
   814  // Get returns the index of the requested value in the hash table or KeyNotFound
   815  // along with a boolean indicating if it was found or not.
   816  func (s *Int16MemoTable) Get(val interface{}) (int, bool) {
   817  
   818  	h := hashInt(uint64(val.(int16)), 0)
   819  	if e, ok := s.tbl.Lookup(h, func(v int16) bool { return val.(int16) == v }); ok {
   820  		return int(e.payload.memoIdx), ok
   821  	}
   822  	return KeyNotFound, false
   823  }
   824  
   825  // GetOrInsert will return the index of the specified value in the table, or insert the
   826  // value into the table and return the new index. found indicates whether or not it already
   827  // existed in the table (true) or was inserted by this call (false).
   828  func (s *Int16MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
   829  
   830  	h := hashInt(uint64(val.(int16)), 0)
   831  	e, ok := s.tbl.Lookup(h, func(v int16) bool {
   832  		return val.(int16) == v
   833  	})
   834  
   835  	if ok {
   836  		idx = int(e.payload.memoIdx)
   837  		found = true
   838  	} else {
   839  		idx = s.Size()
   840  		s.tbl.Insert(e, h, val.(int16), int32(idx))
   841  	}
   842  	return
   843  }
   844  
   845  type payloadUint16 struct {
   846  	val     uint16
   847  	memoIdx int32
   848  }
   849  
   850  type entryUint16 struct {
   851  	h       uint64
   852  	payload payloadUint16
   853  }
   854  
   855  func (e entryUint16) Valid() bool { return e.h != sentinel }
   856  
   857  // Uint16HashTable is a hashtable specifically for uint16 that
   858  // is utilized with the MemoTable to generalize interactions for easier
   859  // implementation of dictionaries without losing performance.
   860  type Uint16HashTable struct {
   861  	cap     uint64
   862  	capMask uint64
   863  	size    uint64
   864  
   865  	entries []entryUint16
   866  }
   867  
   868  // NewUint16HashTable returns a new hash table for uint16 values
   869  // initialized with the passed in capacity or 32 whichever is larger.
   870  func NewUint16HashTable(cap uint64) *Uint16HashTable {
   871  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
   872  	ret := &Uint16HashTable{cap: initCap, capMask: initCap - 1, size: 0}
   873  	ret.entries = make([]entryUint16, initCap)
   874  	return ret
   875  }
   876  
   877  // Reset drops all of the values in this hash table and re-initializes it
   878  // with the specified initial capacity as if by calling New, but without having
   879  // to reallocate the object.
   880  func (h *Uint16HashTable) Reset(cap uint64) {
   881  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
   882  	h.capMask = h.cap - 1
   883  	h.size = 0
   884  	h.entries = make([]entryUint16, h.cap)
   885  }
   886  
   887  // CopyValues is used for copying the values out of the hash table into the
   888  // passed in slice, in the order that they were first inserted
   889  func (h *Uint16HashTable) CopyValues(out []uint16) {
   890  	h.CopyValuesSubset(0, out)
   891  }
   892  
   893  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
   894  // with the value at start, in the order that they were inserted.
   895  func (h *Uint16HashTable) CopyValuesSubset(start int, out []uint16) {
   896  	h.VisitEntries(func(e *entryUint16) {
   897  		idx := e.payload.memoIdx - int32(start)
   898  		if idx >= 0 {
   899  			out[idx] = e.payload.val
   900  		}
   901  	})
   902  }
   903  
   904  func (h *Uint16HashTable) WriteOut(out []byte) {
   905  	h.WriteOutSubset(0, out)
   906  }
   907  
   908  func (h *Uint16HashTable) WriteOutSubset(start int, out []byte) {
   909  	data := arrow.Uint16Traits.CastFromBytes(out)
   910  	h.VisitEntries(func(e *entryUint16) {
   911  		idx := e.payload.memoIdx - int32(start)
   912  		if idx >= 0 {
   913  			data[idx] = utils.ToLEUint16(e.payload.val)
   914  		}
   915  	})
   916  }
   917  
   918  func (h *Uint16HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
   919  
   920  func (Uint16HashTable) fixHash(v uint64) uint64 {
   921  	if v == sentinel {
   922  		return 42
   923  	}
   924  	return v
   925  }
   926  
   927  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
   928  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
   929  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
   930  func (h *Uint16HashTable) Lookup(v uint64, cmp func(uint16) bool) (*entryUint16, bool) {
   931  	idx, ok := h.lookup(v, h.capMask, cmp)
   932  	return &h.entries[idx], ok
   933  }
   934  
   935  func (h *Uint16HashTable) lookup(v uint64, szMask uint64, cmp func(uint16) bool) (uint64, bool) {
   936  	const perturbShift uint8 = 5
   937  
   938  	var (
   939  		idx     uint64
   940  		perturb uint64
   941  		e       *entryUint16
   942  	)
   943  
   944  	v = h.fixHash(v)
   945  	idx = v & szMask
   946  	perturb = (v >> uint64(perturbShift)) + 1
   947  
   948  	for {
   949  		e = &h.entries[idx]
   950  		if e.h == v && cmp(e.payload.val) {
   951  			return idx, true
   952  		}
   953  
   954  		if e.h == sentinel {
   955  			return idx, false
   956  		}
   957  
   958  		// perturbation logic inspired from CPython's set/dict object
   959  		// the goal is that all 64 bits of unmasked hash value eventually
   960  		// participate int he probing sequence, to minimize clustering
   961  		idx = (idx + perturb) & szMask
   962  		perturb = (perturb >> uint64(perturbShift)) + 1
   963  	}
   964  }
   965  
   966  func (h *Uint16HashTable) upsize(newcap uint64) error {
   967  	newMask := newcap - 1
   968  
   969  	oldEntries := h.entries
   970  	h.entries = make([]entryUint16, newcap)
   971  	for _, e := range oldEntries {
   972  		if e.Valid() {
   973  			idx, _ := h.lookup(e.h, newMask, func(uint16) bool { return false })
   974  			h.entries[idx] = e
   975  		}
   976  	}
   977  	h.cap = newcap
   978  	h.capMask = newMask
   979  	return nil
   980  }
   981  
   982  // Insert updates the given entry with the provided hash value, payload value and memo index.
   983  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
   984  func (h *Uint16HashTable) Insert(e *entryUint16, v uint64, val uint16, memoIdx int32) error {
   985  	e.h = h.fixHash(v)
   986  	e.payload.val = val
   987  	e.payload.memoIdx = memoIdx
   988  	h.size++
   989  
   990  	if h.needUpsize() {
   991  		h.upsize(h.cap * uint64(loadFactor) * 2)
   992  	}
   993  	return nil
   994  }
   995  
   996  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
   997  // a valid entry being one which has had a value inserted into it.
   998  func (h *Uint16HashTable) VisitEntries(visit func(*entryUint16)) {
   999  	for _, e := range h.entries {
  1000  		if e.Valid() {
  1001  			visit(&e)
  1002  		}
  1003  	}
  1004  }
  1005  
  1006  // Uint16MemoTable is a wrapper over the appropriate hashtable to provide an interface
  1007  // conforming to the MemoTable interface defined in the encoding package for general interactions
  1008  // regarding dictionaries.
  1009  type Uint16MemoTable struct {
  1010  	tbl     *Uint16HashTable
  1011  	nullIdx int32
  1012  }
  1013  
  1014  // NewUint16MemoTable returns a new memotable with num entries pre-allocated to reduce further
  1015  // allocations when inserting.
  1016  func NewUint16MemoTable(num int64) *Uint16MemoTable {
  1017  	return &Uint16MemoTable{tbl: NewUint16HashTable(uint64(num)), nullIdx: KeyNotFound}
  1018  }
  1019  
  1020  func (Uint16MemoTable) TypeTraits() TypeTraits {
  1021  	return arrow.Uint16Traits
  1022  }
  1023  
  1024  // Reset allows this table to be re-used by dumping all the data currently in the table.
  1025  func (s *Uint16MemoTable) Reset() {
  1026  	s.tbl.Reset(32)
  1027  	s.nullIdx = KeyNotFound
  1028  }
  1029  
  1030  // Size returns the current number of inserted elements into the table including if a null
  1031  // has been inserted.
  1032  func (s *Uint16MemoTable) Size() int {
  1033  	sz := int(s.tbl.size)
  1034  	if _, ok := s.GetNull(); ok {
  1035  		sz++
  1036  	}
  1037  	return sz
  1038  }
  1039  
  1040  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
  1041  // that will be true if found and false if not.
  1042  func (s *Uint16MemoTable) GetNull() (int, bool) {
  1043  	return int(s.nullIdx), s.nullIdx != KeyNotFound
  1044  }
  1045  
  1046  // GetOrInsertNull will return the index of the null entry or insert a null entry
  1047  // if one currently doesn't exist. The found value will be true if there was already
  1048  // a null in the table, and false if it inserted one.
  1049  func (s *Uint16MemoTable) GetOrInsertNull() (idx int, found bool) {
  1050  	idx, found = s.GetNull()
  1051  	if !found {
  1052  		idx = s.Size()
  1053  		s.nullIdx = int32(idx)
  1054  	}
  1055  	return
  1056  }
  1057  
  1058  // CopyValues will copy the values from the memo table out into the passed in slice
  1059  // which must be of the appropriate type.
  1060  func (s *Uint16MemoTable) CopyValues(out interface{}) {
  1061  	s.CopyValuesSubset(0, out)
  1062  }
  1063  
  1064  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
  1065  // at the provided start index
  1066  func (s *Uint16MemoTable) CopyValuesSubset(start int, out interface{}) {
  1067  	s.tbl.CopyValuesSubset(start, out.([]uint16))
  1068  }
  1069  
  1070  func (s *Uint16MemoTable) WriteOut(out []byte) {
  1071  	s.tbl.CopyValues(arrow.Uint16Traits.CastFromBytes(out))
  1072  }
  1073  
  1074  func (s *Uint16MemoTable) WriteOutSubset(start int, out []byte) {
  1075  	s.tbl.CopyValuesSubset(start, arrow.Uint16Traits.CastFromBytes(out))
  1076  }
  1077  
  1078  func (s *Uint16MemoTable) WriteOutLE(out []byte) {
  1079  	s.tbl.WriteOut(out)
  1080  }
  1081  
  1082  func (s *Uint16MemoTable) WriteOutSubsetLE(start int, out []byte) {
  1083  	s.tbl.WriteOutSubset(start, out)
  1084  }
  1085  
  1086  // Get returns the index of the requested value in the hash table or KeyNotFound
  1087  // along with a boolean indicating if it was found or not.
  1088  func (s *Uint16MemoTable) Get(val interface{}) (int, bool) {
  1089  
  1090  	h := hashInt(uint64(val.(uint16)), 0)
  1091  	if e, ok := s.tbl.Lookup(h, func(v uint16) bool { return val.(uint16) == v }); ok {
  1092  		return int(e.payload.memoIdx), ok
  1093  	}
  1094  	return KeyNotFound, false
  1095  }
  1096  
  1097  // GetOrInsert will return the index of the specified value in the table, or insert the
  1098  // value into the table and return the new index. found indicates whether or not it already
  1099  // existed in the table (true) or was inserted by this call (false).
  1100  func (s *Uint16MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
  1101  
  1102  	h := hashInt(uint64(val.(uint16)), 0)
  1103  	e, ok := s.tbl.Lookup(h, func(v uint16) bool {
  1104  		return val.(uint16) == v
  1105  	})
  1106  
  1107  	if ok {
  1108  		idx = int(e.payload.memoIdx)
  1109  		found = true
  1110  	} else {
  1111  		idx = s.Size()
  1112  		s.tbl.Insert(e, h, val.(uint16), int32(idx))
  1113  	}
  1114  	return
  1115  }
  1116  
  1117  type payloadInt32 struct {
  1118  	val     int32
  1119  	memoIdx int32
  1120  }
  1121  
  1122  type entryInt32 struct {
  1123  	h       uint64
  1124  	payload payloadInt32
  1125  }
  1126  
  1127  func (e entryInt32) Valid() bool { return e.h != sentinel }
  1128  
  1129  // Int32HashTable is a hashtable specifically for int32 that
  1130  // is utilized with the MemoTable to generalize interactions for easier
  1131  // implementation of dictionaries without losing performance.
  1132  type Int32HashTable struct {
  1133  	cap     uint64
  1134  	capMask uint64
  1135  	size    uint64
  1136  
  1137  	entries []entryInt32
  1138  }
  1139  
  1140  // NewInt32HashTable returns a new hash table for int32 values
  1141  // initialized with the passed in capacity or 32 whichever is larger.
  1142  func NewInt32HashTable(cap uint64) *Int32HashTable {
  1143  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  1144  	ret := &Int32HashTable{cap: initCap, capMask: initCap - 1, size: 0}
  1145  	ret.entries = make([]entryInt32, initCap)
  1146  	return ret
  1147  }
  1148  
  1149  // Reset drops all of the values in this hash table and re-initializes it
  1150  // with the specified initial capacity as if by calling New, but without having
  1151  // to reallocate the object.
  1152  func (h *Int32HashTable) Reset(cap uint64) {
  1153  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  1154  	h.capMask = h.cap - 1
  1155  	h.size = 0
  1156  	h.entries = make([]entryInt32, h.cap)
  1157  }
  1158  
  1159  // CopyValues is used for copying the values out of the hash table into the
  1160  // passed in slice, in the order that they were first inserted
  1161  func (h *Int32HashTable) CopyValues(out []int32) {
  1162  	h.CopyValuesSubset(0, out)
  1163  }
  1164  
  1165  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
  1166  // with the value at start, in the order that they were inserted.
  1167  func (h *Int32HashTable) CopyValuesSubset(start int, out []int32) {
  1168  	h.VisitEntries(func(e *entryInt32) {
  1169  		idx := e.payload.memoIdx - int32(start)
  1170  		if idx >= 0 {
  1171  			out[idx] = e.payload.val
  1172  		}
  1173  	})
  1174  }
  1175  
  1176  func (h *Int32HashTable) WriteOut(out []byte) {
  1177  	h.WriteOutSubset(0, out)
  1178  }
  1179  
  1180  func (h *Int32HashTable) WriteOutSubset(start int, out []byte) {
  1181  	data := arrow.Int32Traits.CastFromBytes(out)
  1182  	h.VisitEntries(func(e *entryInt32) {
  1183  		idx := e.payload.memoIdx - int32(start)
  1184  		if idx >= 0 {
  1185  			data[idx] = utils.ToLEInt32(e.payload.val)
  1186  		}
  1187  	})
  1188  }
  1189  
  1190  func (h *Int32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
  1191  
  1192  func (Int32HashTable) fixHash(v uint64) uint64 {
  1193  	if v == sentinel {
  1194  		return 42
  1195  	}
  1196  	return v
  1197  }
  1198  
  1199  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
  1200  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
  1201  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
  1202  func (h *Int32HashTable) Lookup(v uint64, cmp func(int32) bool) (*entryInt32, bool) {
  1203  	idx, ok := h.lookup(v, h.capMask, cmp)
  1204  	return &h.entries[idx], ok
  1205  }
  1206  
  1207  func (h *Int32HashTable) lookup(v uint64, szMask uint64, cmp func(int32) bool) (uint64, bool) {
  1208  	const perturbShift uint8 = 5
  1209  
  1210  	var (
  1211  		idx     uint64
  1212  		perturb uint64
  1213  		e       *entryInt32
  1214  	)
  1215  
  1216  	v = h.fixHash(v)
  1217  	idx = v & szMask
  1218  	perturb = (v >> uint64(perturbShift)) + 1
  1219  
  1220  	for {
  1221  		e = &h.entries[idx]
  1222  		if e.h == v && cmp(e.payload.val) {
  1223  			return idx, true
  1224  		}
  1225  
  1226  		if e.h == sentinel {
  1227  			return idx, false
  1228  		}
  1229  
  1230  		// perturbation logic inspired from CPython's set/dict object
  1231  		// the goal is that all 64 bits of unmasked hash value eventually
  1232  		// participate int he probing sequence, to minimize clustering
  1233  		idx = (idx + perturb) & szMask
  1234  		perturb = (perturb >> uint64(perturbShift)) + 1
  1235  	}
  1236  }
  1237  
  1238  func (h *Int32HashTable) upsize(newcap uint64) error {
  1239  	newMask := newcap - 1
  1240  
  1241  	oldEntries := h.entries
  1242  	h.entries = make([]entryInt32, newcap)
  1243  	for _, e := range oldEntries {
  1244  		if e.Valid() {
  1245  			idx, _ := h.lookup(e.h, newMask, func(int32) bool { return false })
  1246  			h.entries[idx] = e
  1247  		}
  1248  	}
  1249  	h.cap = newcap
  1250  	h.capMask = newMask
  1251  	return nil
  1252  }
  1253  
  1254  // Insert updates the given entry with the provided hash value, payload value and memo index.
  1255  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
  1256  func (h *Int32HashTable) Insert(e *entryInt32, v uint64, val int32, memoIdx int32) error {
  1257  	e.h = h.fixHash(v)
  1258  	e.payload.val = val
  1259  	e.payload.memoIdx = memoIdx
  1260  	h.size++
  1261  
  1262  	if h.needUpsize() {
  1263  		h.upsize(h.cap * uint64(loadFactor) * 2)
  1264  	}
  1265  	return nil
  1266  }
  1267  
  1268  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
  1269  // a valid entry being one which has had a value inserted into it.
  1270  func (h *Int32HashTable) VisitEntries(visit func(*entryInt32)) {
  1271  	for _, e := range h.entries {
  1272  		if e.Valid() {
  1273  			visit(&e)
  1274  		}
  1275  	}
  1276  }
  1277  
  1278  // Int32MemoTable is a wrapper over the appropriate hashtable to provide an interface
  1279  // conforming to the MemoTable interface defined in the encoding package for general interactions
  1280  // regarding dictionaries.
  1281  type Int32MemoTable struct {
  1282  	tbl     *Int32HashTable
  1283  	nullIdx int32
  1284  }
  1285  
  1286  // NewInt32MemoTable returns a new memotable with num entries pre-allocated to reduce further
  1287  // allocations when inserting.
  1288  func NewInt32MemoTable(num int64) *Int32MemoTable {
  1289  	return &Int32MemoTable{tbl: NewInt32HashTable(uint64(num)), nullIdx: KeyNotFound}
  1290  }
  1291  
  1292  func (Int32MemoTable) TypeTraits() TypeTraits {
  1293  	return arrow.Int32Traits
  1294  }
  1295  
  1296  // Reset allows this table to be re-used by dumping all the data currently in the table.
  1297  func (s *Int32MemoTable) Reset() {
  1298  	s.tbl.Reset(32)
  1299  	s.nullIdx = KeyNotFound
  1300  }
  1301  
  1302  // Size returns the current number of inserted elements into the table including if a null
  1303  // has been inserted.
  1304  func (s *Int32MemoTable) Size() int {
  1305  	sz := int(s.tbl.size)
  1306  	if _, ok := s.GetNull(); ok {
  1307  		sz++
  1308  	}
  1309  	return sz
  1310  }
  1311  
  1312  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
  1313  // that will be true if found and false if not.
  1314  func (s *Int32MemoTable) GetNull() (int, bool) {
  1315  	return int(s.nullIdx), s.nullIdx != KeyNotFound
  1316  }
  1317  
  1318  // GetOrInsertNull will return the index of the null entry or insert a null entry
  1319  // if one currently doesn't exist. The found value will be true if there was already
  1320  // a null in the table, and false if it inserted one.
  1321  func (s *Int32MemoTable) GetOrInsertNull() (idx int, found bool) {
  1322  	idx, found = s.GetNull()
  1323  	if !found {
  1324  		idx = s.Size()
  1325  		s.nullIdx = int32(idx)
  1326  	}
  1327  	return
  1328  }
  1329  
  1330  // CopyValues will copy the values from the memo table out into the passed in slice
  1331  // which must be of the appropriate type.
  1332  func (s *Int32MemoTable) CopyValues(out interface{}) {
  1333  	s.CopyValuesSubset(0, out)
  1334  }
  1335  
  1336  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
  1337  // at the provided start index
  1338  func (s *Int32MemoTable) CopyValuesSubset(start int, out interface{}) {
  1339  	s.tbl.CopyValuesSubset(start, out.([]int32))
  1340  }
  1341  
  1342  func (s *Int32MemoTable) WriteOut(out []byte) {
  1343  	s.tbl.CopyValues(arrow.Int32Traits.CastFromBytes(out))
  1344  }
  1345  
  1346  func (s *Int32MemoTable) WriteOutSubset(start int, out []byte) {
  1347  	s.tbl.CopyValuesSubset(start, arrow.Int32Traits.CastFromBytes(out))
  1348  }
  1349  
  1350  func (s *Int32MemoTable) WriteOutLE(out []byte) {
  1351  	s.tbl.WriteOut(out)
  1352  }
  1353  
  1354  func (s *Int32MemoTable) WriteOutSubsetLE(start int, out []byte) {
  1355  	s.tbl.WriteOutSubset(start, out)
  1356  }
  1357  
  1358  // Get returns the index of the requested value in the hash table or KeyNotFound
  1359  // along with a boolean indicating if it was found or not.
  1360  func (s *Int32MemoTable) Get(val interface{}) (int, bool) {
  1361  
  1362  	h := hashInt(uint64(val.(int32)), 0)
  1363  	if e, ok := s.tbl.Lookup(h, func(v int32) bool { return val.(int32) == v }); ok {
  1364  		return int(e.payload.memoIdx), ok
  1365  	}
  1366  	return KeyNotFound, false
  1367  }
  1368  
  1369  // GetOrInsert will return the index of the specified value in the table, or insert the
  1370  // value into the table and return the new index. found indicates whether or not it already
  1371  // existed in the table (true) or was inserted by this call (false).
  1372  func (s *Int32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
  1373  
  1374  	h := hashInt(uint64(val.(int32)), 0)
  1375  	e, ok := s.tbl.Lookup(h, func(v int32) bool {
  1376  		return val.(int32) == v
  1377  	})
  1378  
  1379  	if ok {
  1380  		idx = int(e.payload.memoIdx)
  1381  		found = true
  1382  	} else {
  1383  		idx = s.Size()
  1384  		s.tbl.Insert(e, h, val.(int32), int32(idx))
  1385  	}
  1386  	return
  1387  }
  1388  
  1389  type payloadInt64 struct {
  1390  	val     int64
  1391  	memoIdx int32
  1392  }
  1393  
  1394  type entryInt64 struct {
  1395  	h       uint64
  1396  	payload payloadInt64
  1397  }
  1398  
  1399  func (e entryInt64) Valid() bool { return e.h != sentinel }
  1400  
  1401  // Int64HashTable is a hashtable specifically for int64 that
  1402  // is utilized with the MemoTable to generalize interactions for easier
  1403  // implementation of dictionaries without losing performance.
  1404  type Int64HashTable struct {
  1405  	cap     uint64
  1406  	capMask uint64
  1407  	size    uint64
  1408  
  1409  	entries []entryInt64
  1410  }
  1411  
  1412  // NewInt64HashTable returns a new hash table for int64 values
  1413  // initialized with the passed in capacity or 32 whichever is larger.
  1414  func NewInt64HashTable(cap uint64) *Int64HashTable {
  1415  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  1416  	ret := &Int64HashTable{cap: initCap, capMask: initCap - 1, size: 0}
  1417  	ret.entries = make([]entryInt64, initCap)
  1418  	return ret
  1419  }
  1420  
  1421  // Reset drops all of the values in this hash table and re-initializes it
  1422  // with the specified initial capacity as if by calling New, but without having
  1423  // to reallocate the object.
  1424  func (h *Int64HashTable) Reset(cap uint64) {
  1425  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  1426  	h.capMask = h.cap - 1
  1427  	h.size = 0
  1428  	h.entries = make([]entryInt64, h.cap)
  1429  }
  1430  
  1431  // CopyValues is used for copying the values out of the hash table into the
  1432  // passed in slice, in the order that they were first inserted
  1433  func (h *Int64HashTable) CopyValues(out []int64) {
  1434  	h.CopyValuesSubset(0, out)
  1435  }
  1436  
  1437  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
  1438  // with the value at start, in the order that they were inserted.
  1439  func (h *Int64HashTable) CopyValuesSubset(start int, out []int64) {
  1440  	h.VisitEntries(func(e *entryInt64) {
  1441  		idx := e.payload.memoIdx - int32(start)
  1442  		if idx >= 0 {
  1443  			out[idx] = e.payload.val
  1444  		}
  1445  	})
  1446  }
  1447  
  1448  func (h *Int64HashTable) WriteOut(out []byte) {
  1449  	h.WriteOutSubset(0, out)
  1450  }
  1451  
  1452  func (h *Int64HashTable) WriteOutSubset(start int, out []byte) {
  1453  	data := arrow.Int64Traits.CastFromBytes(out)
  1454  	h.VisitEntries(func(e *entryInt64) {
  1455  		idx := e.payload.memoIdx - int32(start)
  1456  		if idx >= 0 {
  1457  			data[idx] = utils.ToLEInt64(e.payload.val)
  1458  		}
  1459  	})
  1460  }
  1461  
  1462  func (h *Int64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
  1463  
  1464  func (Int64HashTable) fixHash(v uint64) uint64 {
  1465  	if v == sentinel {
  1466  		return 42
  1467  	}
  1468  	return v
  1469  }
  1470  
  1471  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
  1472  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
  1473  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
  1474  func (h *Int64HashTable) Lookup(v uint64, cmp func(int64) bool) (*entryInt64, bool) {
  1475  	idx, ok := h.lookup(v, h.capMask, cmp)
  1476  	return &h.entries[idx], ok
  1477  }
  1478  
  1479  func (h *Int64HashTable) lookup(v uint64, szMask uint64, cmp func(int64) bool) (uint64, bool) {
  1480  	const perturbShift uint8 = 5
  1481  
  1482  	var (
  1483  		idx     uint64
  1484  		perturb uint64
  1485  		e       *entryInt64
  1486  	)
  1487  
  1488  	v = h.fixHash(v)
  1489  	idx = v & szMask
  1490  	perturb = (v >> uint64(perturbShift)) + 1
  1491  
  1492  	for {
  1493  		e = &h.entries[idx]
  1494  		if e.h == v && cmp(e.payload.val) {
  1495  			return idx, true
  1496  		}
  1497  
  1498  		if e.h == sentinel {
  1499  			return idx, false
  1500  		}
  1501  
  1502  		// perturbation logic inspired from CPython's set/dict object
  1503  		// the goal is that all 64 bits of unmasked hash value eventually
  1504  		// participate int he probing sequence, to minimize clustering
  1505  		idx = (idx + perturb) & szMask
  1506  		perturb = (perturb >> uint64(perturbShift)) + 1
  1507  	}
  1508  }
  1509  
  1510  func (h *Int64HashTable) upsize(newcap uint64) error {
  1511  	newMask := newcap - 1
  1512  
  1513  	oldEntries := h.entries
  1514  	h.entries = make([]entryInt64, newcap)
  1515  	for _, e := range oldEntries {
  1516  		if e.Valid() {
  1517  			idx, _ := h.lookup(e.h, newMask, func(int64) bool { return false })
  1518  			h.entries[idx] = e
  1519  		}
  1520  	}
  1521  	h.cap = newcap
  1522  	h.capMask = newMask
  1523  	return nil
  1524  }
  1525  
  1526  // Insert updates the given entry with the provided hash value, payload value and memo index.
  1527  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
  1528  func (h *Int64HashTable) Insert(e *entryInt64, v uint64, val int64, memoIdx int32) error {
  1529  	e.h = h.fixHash(v)
  1530  	e.payload.val = val
  1531  	e.payload.memoIdx = memoIdx
  1532  	h.size++
  1533  
  1534  	if h.needUpsize() {
  1535  		h.upsize(h.cap * uint64(loadFactor) * 2)
  1536  	}
  1537  	return nil
  1538  }
  1539  
  1540  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
  1541  // a valid entry being one which has had a value inserted into it.
  1542  func (h *Int64HashTable) VisitEntries(visit func(*entryInt64)) {
  1543  	for _, e := range h.entries {
  1544  		if e.Valid() {
  1545  			visit(&e)
  1546  		}
  1547  	}
  1548  }
  1549  
  1550  // Int64MemoTable is a wrapper over the appropriate hashtable to provide an interface
  1551  // conforming to the MemoTable interface defined in the encoding package for general interactions
  1552  // regarding dictionaries.
  1553  type Int64MemoTable struct {
  1554  	tbl     *Int64HashTable
  1555  	nullIdx int32
  1556  }
  1557  
  1558  // NewInt64MemoTable returns a new memotable with num entries pre-allocated to reduce further
  1559  // allocations when inserting.
  1560  func NewInt64MemoTable(num int64) *Int64MemoTable {
  1561  	return &Int64MemoTable{tbl: NewInt64HashTable(uint64(num)), nullIdx: KeyNotFound}
  1562  }
  1563  
  1564  func (Int64MemoTable) TypeTraits() TypeTraits {
  1565  	return arrow.Int64Traits
  1566  }
  1567  
  1568  // Reset allows this table to be re-used by dumping all the data currently in the table.
  1569  func (s *Int64MemoTable) Reset() {
  1570  	s.tbl.Reset(32)
  1571  	s.nullIdx = KeyNotFound
  1572  }
  1573  
  1574  // Size returns the current number of inserted elements into the table including if a null
  1575  // has been inserted.
  1576  func (s *Int64MemoTable) Size() int {
  1577  	sz := int(s.tbl.size)
  1578  	if _, ok := s.GetNull(); ok {
  1579  		sz++
  1580  	}
  1581  	return sz
  1582  }
  1583  
  1584  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
  1585  // that will be true if found and false if not.
  1586  func (s *Int64MemoTable) GetNull() (int, bool) {
  1587  	return int(s.nullIdx), s.nullIdx != KeyNotFound
  1588  }
  1589  
  1590  // GetOrInsertNull will return the index of the null entry or insert a null entry
  1591  // if one currently doesn't exist. The found value will be true if there was already
  1592  // a null in the table, and false if it inserted one.
  1593  func (s *Int64MemoTable) GetOrInsertNull() (idx int, found bool) {
  1594  	idx, found = s.GetNull()
  1595  	if !found {
  1596  		idx = s.Size()
  1597  		s.nullIdx = int32(idx)
  1598  	}
  1599  	return
  1600  }
  1601  
  1602  // CopyValues will copy the values from the memo table out into the passed in slice
  1603  // which must be of the appropriate type.
  1604  func (s *Int64MemoTable) CopyValues(out interface{}) {
  1605  	s.CopyValuesSubset(0, out)
  1606  }
  1607  
  1608  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
  1609  // at the provided start index
  1610  func (s *Int64MemoTable) CopyValuesSubset(start int, out interface{}) {
  1611  	s.tbl.CopyValuesSubset(start, out.([]int64))
  1612  }
  1613  
  1614  func (s *Int64MemoTable) WriteOut(out []byte) {
  1615  	s.tbl.CopyValues(arrow.Int64Traits.CastFromBytes(out))
  1616  }
  1617  
  1618  func (s *Int64MemoTable) WriteOutSubset(start int, out []byte) {
  1619  	s.tbl.CopyValuesSubset(start, arrow.Int64Traits.CastFromBytes(out))
  1620  }
  1621  
  1622  func (s *Int64MemoTable) WriteOutLE(out []byte) {
  1623  	s.tbl.WriteOut(out)
  1624  }
  1625  
  1626  func (s *Int64MemoTable) WriteOutSubsetLE(start int, out []byte) {
  1627  	s.tbl.WriteOutSubset(start, out)
  1628  }
  1629  
  1630  // Get returns the index of the requested value in the hash table or KeyNotFound
  1631  // along with a boolean indicating if it was found or not.
  1632  func (s *Int64MemoTable) Get(val interface{}) (int, bool) {
  1633  
  1634  	h := hashInt(uint64(val.(int64)), 0)
  1635  	if e, ok := s.tbl.Lookup(h, func(v int64) bool { return val.(int64) == v }); ok {
  1636  		return int(e.payload.memoIdx), ok
  1637  	}
  1638  	return KeyNotFound, false
  1639  }
  1640  
  1641  // GetOrInsert will return the index of the specified value in the table, or insert the
  1642  // value into the table and return the new index. found indicates whether or not it already
  1643  // existed in the table (true) or was inserted by this call (false).
  1644  func (s *Int64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
  1645  
  1646  	h := hashInt(uint64(val.(int64)), 0)
  1647  	e, ok := s.tbl.Lookup(h, func(v int64) bool {
  1648  		return val.(int64) == v
  1649  	})
  1650  
  1651  	if ok {
  1652  		idx = int(e.payload.memoIdx)
  1653  		found = true
  1654  	} else {
  1655  		idx = s.Size()
  1656  		s.tbl.Insert(e, h, val.(int64), int32(idx))
  1657  	}
  1658  	return
  1659  }
  1660  
  1661  type payloadUint32 struct {
  1662  	val     uint32
  1663  	memoIdx int32
  1664  }
  1665  
  1666  type entryUint32 struct {
  1667  	h       uint64
  1668  	payload payloadUint32
  1669  }
  1670  
  1671  func (e entryUint32) Valid() bool { return e.h != sentinel }
  1672  
  1673  // Uint32HashTable is a hashtable specifically for uint32 that
  1674  // is utilized with the MemoTable to generalize interactions for easier
  1675  // implementation of dictionaries without losing performance.
  1676  type Uint32HashTable struct {
  1677  	cap     uint64
  1678  	capMask uint64
  1679  	size    uint64
  1680  
  1681  	entries []entryUint32
  1682  }
  1683  
  1684  // NewUint32HashTable returns a new hash table for uint32 values
  1685  // initialized with the passed in capacity or 32 whichever is larger.
  1686  func NewUint32HashTable(cap uint64) *Uint32HashTable {
  1687  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  1688  	ret := &Uint32HashTable{cap: initCap, capMask: initCap - 1, size: 0}
  1689  	ret.entries = make([]entryUint32, initCap)
  1690  	return ret
  1691  }
  1692  
  1693  // Reset drops all of the values in this hash table and re-initializes it
  1694  // with the specified initial capacity as if by calling New, but without having
  1695  // to reallocate the object.
  1696  func (h *Uint32HashTable) Reset(cap uint64) {
  1697  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  1698  	h.capMask = h.cap - 1
  1699  	h.size = 0
  1700  	h.entries = make([]entryUint32, h.cap)
  1701  }
  1702  
  1703  // CopyValues is used for copying the values out of the hash table into the
  1704  // passed in slice, in the order that they were first inserted
  1705  func (h *Uint32HashTable) CopyValues(out []uint32) {
  1706  	h.CopyValuesSubset(0, out)
  1707  }
  1708  
  1709  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
  1710  // with the value at start, in the order that they were inserted.
  1711  func (h *Uint32HashTable) CopyValuesSubset(start int, out []uint32) {
  1712  	h.VisitEntries(func(e *entryUint32) {
  1713  		idx := e.payload.memoIdx - int32(start)
  1714  		if idx >= 0 {
  1715  			out[idx] = e.payload.val
  1716  		}
  1717  	})
  1718  }
  1719  
  1720  func (h *Uint32HashTable) WriteOut(out []byte) {
  1721  	h.WriteOutSubset(0, out)
  1722  }
  1723  
  1724  func (h *Uint32HashTable) WriteOutSubset(start int, out []byte) {
  1725  	data := arrow.Uint32Traits.CastFromBytes(out)
  1726  	h.VisitEntries(func(e *entryUint32) {
  1727  		idx := e.payload.memoIdx - int32(start)
  1728  		if idx >= 0 {
  1729  			data[idx] = utils.ToLEUint32(e.payload.val)
  1730  		}
  1731  	})
  1732  }
  1733  
  1734  func (h *Uint32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
  1735  
  1736  func (Uint32HashTable) fixHash(v uint64) uint64 {
  1737  	if v == sentinel {
  1738  		return 42
  1739  	}
  1740  	return v
  1741  }
  1742  
  1743  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
  1744  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
  1745  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
  1746  func (h *Uint32HashTable) Lookup(v uint64, cmp func(uint32) bool) (*entryUint32, bool) {
  1747  	idx, ok := h.lookup(v, h.capMask, cmp)
  1748  	return &h.entries[idx], ok
  1749  }
  1750  
  1751  func (h *Uint32HashTable) lookup(v uint64, szMask uint64, cmp func(uint32) bool) (uint64, bool) {
  1752  	const perturbShift uint8 = 5
  1753  
  1754  	var (
  1755  		idx     uint64
  1756  		perturb uint64
  1757  		e       *entryUint32
  1758  	)
  1759  
  1760  	v = h.fixHash(v)
  1761  	idx = v & szMask
  1762  	perturb = (v >> uint64(perturbShift)) + 1
  1763  
  1764  	for {
  1765  		e = &h.entries[idx]
  1766  		if e.h == v && cmp(e.payload.val) {
  1767  			return idx, true
  1768  		}
  1769  
  1770  		if e.h == sentinel {
  1771  			return idx, false
  1772  		}
  1773  
  1774  		// perturbation logic inspired from CPython's set/dict object
  1775  		// the goal is that all 64 bits of unmasked hash value eventually
  1776  		// participate int he probing sequence, to minimize clustering
  1777  		idx = (idx + perturb) & szMask
  1778  		perturb = (perturb >> uint64(perturbShift)) + 1
  1779  	}
  1780  }
  1781  
  1782  func (h *Uint32HashTable) upsize(newcap uint64) error {
  1783  	newMask := newcap - 1
  1784  
  1785  	oldEntries := h.entries
  1786  	h.entries = make([]entryUint32, newcap)
  1787  	for _, e := range oldEntries {
  1788  		if e.Valid() {
  1789  			idx, _ := h.lookup(e.h, newMask, func(uint32) bool { return false })
  1790  			h.entries[idx] = e
  1791  		}
  1792  	}
  1793  	h.cap = newcap
  1794  	h.capMask = newMask
  1795  	return nil
  1796  }
  1797  
  1798  // Insert updates the given entry with the provided hash value, payload value and memo index.
  1799  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
  1800  func (h *Uint32HashTable) Insert(e *entryUint32, v uint64, val uint32, memoIdx int32) error {
  1801  	e.h = h.fixHash(v)
  1802  	e.payload.val = val
  1803  	e.payload.memoIdx = memoIdx
  1804  	h.size++
  1805  
  1806  	if h.needUpsize() {
  1807  		h.upsize(h.cap * uint64(loadFactor) * 2)
  1808  	}
  1809  	return nil
  1810  }
  1811  
  1812  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
  1813  // a valid entry being one which has had a value inserted into it.
  1814  func (h *Uint32HashTable) VisitEntries(visit func(*entryUint32)) {
  1815  	for _, e := range h.entries {
  1816  		if e.Valid() {
  1817  			visit(&e)
  1818  		}
  1819  	}
  1820  }
  1821  
  1822  // Uint32MemoTable is a wrapper over the appropriate hashtable to provide an interface
  1823  // conforming to the MemoTable interface defined in the encoding package for general interactions
  1824  // regarding dictionaries.
  1825  type Uint32MemoTable struct {
  1826  	tbl     *Uint32HashTable
  1827  	nullIdx int32
  1828  }
  1829  
  1830  // NewUint32MemoTable returns a new memotable with num entries pre-allocated to reduce further
  1831  // allocations when inserting.
  1832  func NewUint32MemoTable(num int64) *Uint32MemoTable {
  1833  	return &Uint32MemoTable{tbl: NewUint32HashTable(uint64(num)), nullIdx: KeyNotFound}
  1834  }
  1835  
  1836  func (Uint32MemoTable) TypeTraits() TypeTraits {
  1837  	return arrow.Uint32Traits
  1838  }
  1839  
  1840  // Reset allows this table to be re-used by dumping all the data currently in the table.
  1841  func (s *Uint32MemoTable) Reset() {
  1842  	s.tbl.Reset(32)
  1843  	s.nullIdx = KeyNotFound
  1844  }
  1845  
  1846  // Size returns the current number of inserted elements into the table including if a null
  1847  // has been inserted.
  1848  func (s *Uint32MemoTable) Size() int {
  1849  	sz := int(s.tbl.size)
  1850  	if _, ok := s.GetNull(); ok {
  1851  		sz++
  1852  	}
  1853  	return sz
  1854  }
  1855  
  1856  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
  1857  // that will be true if found and false if not.
  1858  func (s *Uint32MemoTable) GetNull() (int, bool) {
  1859  	return int(s.nullIdx), s.nullIdx != KeyNotFound
  1860  }
  1861  
  1862  // GetOrInsertNull will return the index of the null entry or insert a null entry
  1863  // if one currently doesn't exist. The found value will be true if there was already
  1864  // a null in the table, and false if it inserted one.
  1865  func (s *Uint32MemoTable) GetOrInsertNull() (idx int, found bool) {
  1866  	idx, found = s.GetNull()
  1867  	if !found {
  1868  		idx = s.Size()
  1869  		s.nullIdx = int32(idx)
  1870  	}
  1871  	return
  1872  }
  1873  
  1874  // CopyValues will copy the values from the memo table out into the passed in slice
  1875  // which must be of the appropriate type.
  1876  func (s *Uint32MemoTable) CopyValues(out interface{}) {
  1877  	s.CopyValuesSubset(0, out)
  1878  }
  1879  
  1880  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
  1881  // at the provided start index
  1882  func (s *Uint32MemoTable) CopyValuesSubset(start int, out interface{}) {
  1883  	s.tbl.CopyValuesSubset(start, out.([]uint32))
  1884  }
  1885  
  1886  func (s *Uint32MemoTable) WriteOut(out []byte) {
  1887  	s.tbl.CopyValues(arrow.Uint32Traits.CastFromBytes(out))
  1888  }
  1889  
  1890  func (s *Uint32MemoTable) WriteOutSubset(start int, out []byte) {
  1891  	s.tbl.CopyValuesSubset(start, arrow.Uint32Traits.CastFromBytes(out))
  1892  }
  1893  
  1894  func (s *Uint32MemoTable) WriteOutLE(out []byte) {
  1895  	s.tbl.WriteOut(out)
  1896  }
  1897  
  1898  func (s *Uint32MemoTable) WriteOutSubsetLE(start int, out []byte) {
  1899  	s.tbl.WriteOutSubset(start, out)
  1900  }
  1901  
  1902  // Get returns the index of the requested value in the hash table or KeyNotFound
  1903  // along with a boolean indicating if it was found or not.
  1904  func (s *Uint32MemoTable) Get(val interface{}) (int, bool) {
  1905  
  1906  	h := hashInt(uint64(val.(uint32)), 0)
  1907  	if e, ok := s.tbl.Lookup(h, func(v uint32) bool { return val.(uint32) == v }); ok {
  1908  		return int(e.payload.memoIdx), ok
  1909  	}
  1910  	return KeyNotFound, false
  1911  }
  1912  
  1913  // GetOrInsert will return the index of the specified value in the table, or insert the
  1914  // value into the table and return the new index. found indicates whether or not it already
  1915  // existed in the table (true) or was inserted by this call (false).
  1916  func (s *Uint32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
  1917  
  1918  	h := hashInt(uint64(val.(uint32)), 0)
  1919  	e, ok := s.tbl.Lookup(h, func(v uint32) bool {
  1920  		return val.(uint32) == v
  1921  	})
  1922  
  1923  	if ok {
  1924  		idx = int(e.payload.memoIdx)
  1925  		found = true
  1926  	} else {
  1927  		idx = s.Size()
  1928  		s.tbl.Insert(e, h, val.(uint32), int32(idx))
  1929  	}
  1930  	return
  1931  }
  1932  
  1933  type payloadUint64 struct {
  1934  	val     uint64
  1935  	memoIdx int32
  1936  }
  1937  
  1938  type entryUint64 struct {
  1939  	h       uint64
  1940  	payload payloadUint64
  1941  }
  1942  
  1943  func (e entryUint64) Valid() bool { return e.h != sentinel }
  1944  
  1945  // Uint64HashTable is a hashtable specifically for uint64 that
  1946  // is utilized with the MemoTable to generalize interactions for easier
  1947  // implementation of dictionaries without losing performance.
  1948  type Uint64HashTable struct {
  1949  	cap     uint64
  1950  	capMask uint64
  1951  	size    uint64
  1952  
  1953  	entries []entryUint64
  1954  }
  1955  
  1956  // NewUint64HashTable returns a new hash table for uint64 values
  1957  // initialized with the passed in capacity or 32 whichever is larger.
  1958  func NewUint64HashTable(cap uint64) *Uint64HashTable {
  1959  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  1960  	ret := &Uint64HashTable{cap: initCap, capMask: initCap - 1, size: 0}
  1961  	ret.entries = make([]entryUint64, initCap)
  1962  	return ret
  1963  }
  1964  
  1965  // Reset drops all of the values in this hash table and re-initializes it
  1966  // with the specified initial capacity as if by calling New, but without having
  1967  // to reallocate the object.
  1968  func (h *Uint64HashTable) Reset(cap uint64) {
  1969  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  1970  	h.capMask = h.cap - 1
  1971  	h.size = 0
  1972  	h.entries = make([]entryUint64, h.cap)
  1973  }
  1974  
  1975  // CopyValues is used for copying the values out of the hash table into the
  1976  // passed in slice, in the order that they were first inserted
  1977  func (h *Uint64HashTable) CopyValues(out []uint64) {
  1978  	h.CopyValuesSubset(0, out)
  1979  }
  1980  
  1981  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
  1982  // with the value at start, in the order that they were inserted.
  1983  func (h *Uint64HashTable) CopyValuesSubset(start int, out []uint64) {
  1984  	h.VisitEntries(func(e *entryUint64) {
  1985  		idx := e.payload.memoIdx - int32(start)
  1986  		if idx >= 0 {
  1987  			out[idx] = e.payload.val
  1988  		}
  1989  	})
  1990  }
  1991  
  1992  func (h *Uint64HashTable) WriteOut(out []byte) {
  1993  	h.WriteOutSubset(0, out)
  1994  }
  1995  
  1996  func (h *Uint64HashTable) WriteOutSubset(start int, out []byte) {
  1997  	data := arrow.Uint64Traits.CastFromBytes(out)
  1998  	h.VisitEntries(func(e *entryUint64) {
  1999  		idx := e.payload.memoIdx - int32(start)
  2000  		if idx >= 0 {
  2001  			data[idx] = utils.ToLEUint64(e.payload.val)
  2002  		}
  2003  	})
  2004  }
  2005  
  2006  func (h *Uint64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
  2007  
  2008  func (Uint64HashTable) fixHash(v uint64) uint64 {
  2009  	if v == sentinel {
  2010  		return 42
  2011  	}
  2012  	return v
  2013  }
  2014  
  2015  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
  2016  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
  2017  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
  2018  func (h *Uint64HashTable) Lookup(v uint64, cmp func(uint64) bool) (*entryUint64, bool) {
  2019  	idx, ok := h.lookup(v, h.capMask, cmp)
  2020  	return &h.entries[idx], ok
  2021  }
  2022  
  2023  func (h *Uint64HashTable) lookup(v uint64, szMask uint64, cmp func(uint64) bool) (uint64, bool) {
  2024  	const perturbShift uint8 = 5
  2025  
  2026  	var (
  2027  		idx     uint64
  2028  		perturb uint64
  2029  		e       *entryUint64
  2030  	)
  2031  
  2032  	v = h.fixHash(v)
  2033  	idx = v & szMask
  2034  	perturb = (v >> uint64(perturbShift)) + 1
  2035  
  2036  	for {
  2037  		e = &h.entries[idx]
  2038  		if e.h == v && cmp(e.payload.val) {
  2039  			return idx, true
  2040  		}
  2041  
  2042  		if e.h == sentinel {
  2043  			return idx, false
  2044  		}
  2045  
  2046  		// perturbation logic inspired from CPython's set/dict object
  2047  		// the goal is that all 64 bits of unmasked hash value eventually
  2048  		// participate int he probing sequence, to minimize clustering
  2049  		idx = (idx + perturb) & szMask
  2050  		perturb = (perturb >> uint64(perturbShift)) + 1
  2051  	}
  2052  }
  2053  
  2054  func (h *Uint64HashTable) upsize(newcap uint64) error {
  2055  	newMask := newcap - 1
  2056  
  2057  	oldEntries := h.entries
  2058  	h.entries = make([]entryUint64, newcap)
  2059  	for _, e := range oldEntries {
  2060  		if e.Valid() {
  2061  			idx, _ := h.lookup(e.h, newMask, func(uint64) bool { return false })
  2062  			h.entries[idx] = e
  2063  		}
  2064  	}
  2065  	h.cap = newcap
  2066  	h.capMask = newMask
  2067  	return nil
  2068  }
  2069  
  2070  // Insert updates the given entry with the provided hash value, payload value and memo index.
  2071  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
  2072  func (h *Uint64HashTable) Insert(e *entryUint64, v uint64, val uint64, memoIdx int32) error {
  2073  	e.h = h.fixHash(v)
  2074  	e.payload.val = val
  2075  	e.payload.memoIdx = memoIdx
  2076  	h.size++
  2077  
  2078  	if h.needUpsize() {
  2079  		h.upsize(h.cap * uint64(loadFactor) * 2)
  2080  	}
  2081  	return nil
  2082  }
  2083  
  2084  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
  2085  // a valid entry being one which has had a value inserted into it.
  2086  func (h *Uint64HashTable) VisitEntries(visit func(*entryUint64)) {
  2087  	for _, e := range h.entries {
  2088  		if e.Valid() {
  2089  			visit(&e)
  2090  		}
  2091  	}
  2092  }
  2093  
  2094  // Uint64MemoTable is a wrapper over the appropriate hashtable to provide an interface
  2095  // conforming to the MemoTable interface defined in the encoding package for general interactions
  2096  // regarding dictionaries.
  2097  type Uint64MemoTable struct {
  2098  	tbl     *Uint64HashTable
  2099  	nullIdx int32
  2100  }
  2101  
  2102  // NewUint64MemoTable returns a new memotable with num entries pre-allocated to reduce further
  2103  // allocations when inserting.
  2104  func NewUint64MemoTable(num int64) *Uint64MemoTable {
  2105  	return &Uint64MemoTable{tbl: NewUint64HashTable(uint64(num)), nullIdx: KeyNotFound}
  2106  }
  2107  
  2108  func (Uint64MemoTable) TypeTraits() TypeTraits {
  2109  	return arrow.Uint64Traits
  2110  }
  2111  
  2112  // Reset allows this table to be re-used by dumping all the data currently in the table.
  2113  func (s *Uint64MemoTable) Reset() {
  2114  	s.tbl.Reset(32)
  2115  	s.nullIdx = KeyNotFound
  2116  }
  2117  
  2118  // Size returns the current number of inserted elements into the table including if a null
  2119  // has been inserted.
  2120  func (s *Uint64MemoTable) Size() int {
  2121  	sz := int(s.tbl.size)
  2122  	if _, ok := s.GetNull(); ok {
  2123  		sz++
  2124  	}
  2125  	return sz
  2126  }
  2127  
  2128  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
  2129  // that will be true if found and false if not.
  2130  func (s *Uint64MemoTable) GetNull() (int, bool) {
  2131  	return int(s.nullIdx), s.nullIdx != KeyNotFound
  2132  }
  2133  
  2134  // GetOrInsertNull will return the index of the null entry or insert a null entry
  2135  // if one currently doesn't exist. The found value will be true if there was already
  2136  // a null in the table, and false if it inserted one.
  2137  func (s *Uint64MemoTable) GetOrInsertNull() (idx int, found bool) {
  2138  	idx, found = s.GetNull()
  2139  	if !found {
  2140  		idx = s.Size()
  2141  		s.nullIdx = int32(idx)
  2142  	}
  2143  	return
  2144  }
  2145  
  2146  // CopyValues will copy the values from the memo table out into the passed in slice
  2147  // which must be of the appropriate type.
  2148  func (s *Uint64MemoTable) CopyValues(out interface{}) {
  2149  	s.CopyValuesSubset(0, out)
  2150  }
  2151  
  2152  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
  2153  // at the provided start index
  2154  func (s *Uint64MemoTable) CopyValuesSubset(start int, out interface{}) {
  2155  	s.tbl.CopyValuesSubset(start, out.([]uint64))
  2156  }
  2157  
  2158  func (s *Uint64MemoTable) WriteOut(out []byte) {
  2159  	s.tbl.CopyValues(arrow.Uint64Traits.CastFromBytes(out))
  2160  }
  2161  
  2162  func (s *Uint64MemoTable) WriteOutSubset(start int, out []byte) {
  2163  	s.tbl.CopyValuesSubset(start, arrow.Uint64Traits.CastFromBytes(out))
  2164  }
  2165  
  2166  func (s *Uint64MemoTable) WriteOutLE(out []byte) {
  2167  	s.tbl.WriteOut(out)
  2168  }
  2169  
  2170  func (s *Uint64MemoTable) WriteOutSubsetLE(start int, out []byte) {
  2171  	s.tbl.WriteOutSubset(start, out)
  2172  }
  2173  
  2174  // Get returns the index of the requested value in the hash table or KeyNotFound
  2175  // along with a boolean indicating if it was found or not.
  2176  func (s *Uint64MemoTable) Get(val interface{}) (int, bool) {
  2177  
  2178  	h := hashInt(uint64(val.(uint64)), 0)
  2179  	if e, ok := s.tbl.Lookup(h, func(v uint64) bool { return val.(uint64) == v }); ok {
  2180  		return int(e.payload.memoIdx), ok
  2181  	}
  2182  	return KeyNotFound, false
  2183  }
  2184  
  2185  // GetOrInsert will return the index of the specified value in the table, or insert the
  2186  // value into the table and return the new index. found indicates whether or not it already
  2187  // existed in the table (true) or was inserted by this call (false).
  2188  func (s *Uint64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
  2189  
  2190  	h := hashInt(uint64(val.(uint64)), 0)
  2191  	e, ok := s.tbl.Lookup(h, func(v uint64) bool {
  2192  		return val.(uint64) == v
  2193  	})
  2194  
  2195  	if ok {
  2196  		idx = int(e.payload.memoIdx)
  2197  		found = true
  2198  	} else {
  2199  		idx = s.Size()
  2200  		s.tbl.Insert(e, h, val.(uint64), int32(idx))
  2201  	}
  2202  	return
  2203  }
  2204  
  2205  type payloadFloat32 struct {
  2206  	val     float32
  2207  	memoIdx int32
  2208  }
  2209  
  2210  type entryFloat32 struct {
  2211  	h       uint64
  2212  	payload payloadFloat32
  2213  }
  2214  
  2215  func (e entryFloat32) Valid() bool { return e.h != sentinel }
  2216  
  2217  // Float32HashTable is a hashtable specifically for float32 that
  2218  // is utilized with the MemoTable to generalize interactions for easier
  2219  // implementation of dictionaries without losing performance.
  2220  type Float32HashTable struct {
  2221  	cap     uint64
  2222  	capMask uint64
  2223  	size    uint64
  2224  
  2225  	entries []entryFloat32
  2226  }
  2227  
  2228  // NewFloat32HashTable returns a new hash table for float32 values
  2229  // initialized with the passed in capacity or 32 whichever is larger.
  2230  func NewFloat32HashTable(cap uint64) *Float32HashTable {
  2231  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  2232  	ret := &Float32HashTable{cap: initCap, capMask: initCap - 1, size: 0}
  2233  	ret.entries = make([]entryFloat32, initCap)
  2234  	return ret
  2235  }
  2236  
  2237  // Reset drops all of the values in this hash table and re-initializes it
  2238  // with the specified initial capacity as if by calling New, but without having
  2239  // to reallocate the object.
  2240  func (h *Float32HashTable) Reset(cap uint64) {
  2241  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  2242  	h.capMask = h.cap - 1
  2243  	h.size = 0
  2244  	h.entries = make([]entryFloat32, h.cap)
  2245  }
  2246  
  2247  // CopyValues is used for copying the values out of the hash table into the
  2248  // passed in slice, in the order that they were first inserted
  2249  func (h *Float32HashTable) CopyValues(out []float32) {
  2250  	h.CopyValuesSubset(0, out)
  2251  }
  2252  
  2253  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
  2254  // with the value at start, in the order that they were inserted.
  2255  func (h *Float32HashTable) CopyValuesSubset(start int, out []float32) {
  2256  	h.VisitEntries(func(e *entryFloat32) {
  2257  		idx := e.payload.memoIdx - int32(start)
  2258  		if idx >= 0 {
  2259  			out[idx] = e.payload.val
  2260  		}
  2261  	})
  2262  }
  2263  
  2264  func (h *Float32HashTable) WriteOut(out []byte) {
  2265  	h.WriteOutSubset(0, out)
  2266  }
  2267  
  2268  func (h *Float32HashTable) WriteOutSubset(start int, out []byte) {
  2269  	data := arrow.Float32Traits.CastFromBytes(out)
  2270  	h.VisitEntries(func(e *entryFloat32) {
  2271  		idx := e.payload.memoIdx - int32(start)
  2272  		if idx >= 0 {
  2273  			data[idx] = utils.ToLEFloat32(e.payload.val)
  2274  		}
  2275  	})
  2276  }
  2277  
  2278  func (h *Float32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
  2279  
  2280  func (Float32HashTable) fixHash(v uint64) uint64 {
  2281  	if v == sentinel {
  2282  		return 42
  2283  	}
  2284  	return v
  2285  }
  2286  
  2287  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
  2288  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
  2289  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
  2290  func (h *Float32HashTable) Lookup(v uint64, cmp func(float32) bool) (*entryFloat32, bool) {
  2291  	idx, ok := h.lookup(v, h.capMask, cmp)
  2292  	return &h.entries[idx], ok
  2293  }
  2294  
  2295  func (h *Float32HashTable) lookup(v uint64, szMask uint64, cmp func(float32) bool) (uint64, bool) {
  2296  	const perturbShift uint8 = 5
  2297  
  2298  	var (
  2299  		idx     uint64
  2300  		perturb uint64
  2301  		e       *entryFloat32
  2302  	)
  2303  
  2304  	v = h.fixHash(v)
  2305  	idx = v & szMask
  2306  	perturb = (v >> uint64(perturbShift)) + 1
  2307  
  2308  	for {
  2309  		e = &h.entries[idx]
  2310  		if e.h == v && cmp(e.payload.val) {
  2311  			return idx, true
  2312  		}
  2313  
  2314  		if e.h == sentinel {
  2315  			return idx, false
  2316  		}
  2317  
  2318  		// perturbation logic inspired from CPython's set/dict object
  2319  		// the goal is that all 64 bits of unmasked hash value eventually
  2320  		// participate int he probing sequence, to minimize clustering
  2321  		idx = (idx + perturb) & szMask
  2322  		perturb = (perturb >> uint64(perturbShift)) + 1
  2323  	}
  2324  }
  2325  
  2326  func (h *Float32HashTable) upsize(newcap uint64) error {
  2327  	newMask := newcap - 1
  2328  
  2329  	oldEntries := h.entries
  2330  	h.entries = make([]entryFloat32, newcap)
  2331  	for _, e := range oldEntries {
  2332  		if e.Valid() {
  2333  			idx, _ := h.lookup(e.h, newMask, func(float32) bool { return false })
  2334  			h.entries[idx] = e
  2335  		}
  2336  	}
  2337  	h.cap = newcap
  2338  	h.capMask = newMask
  2339  	return nil
  2340  }
  2341  
  2342  // Insert updates the given entry with the provided hash value, payload value and memo index.
  2343  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
  2344  func (h *Float32HashTable) Insert(e *entryFloat32, v uint64, val float32, memoIdx int32) error {
  2345  	e.h = h.fixHash(v)
  2346  	e.payload.val = val
  2347  	e.payload.memoIdx = memoIdx
  2348  	h.size++
  2349  
  2350  	if h.needUpsize() {
  2351  		h.upsize(h.cap * uint64(loadFactor) * 2)
  2352  	}
  2353  	return nil
  2354  }
  2355  
  2356  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
  2357  // a valid entry being one which has had a value inserted into it.
  2358  func (h *Float32HashTable) VisitEntries(visit func(*entryFloat32)) {
  2359  	for _, e := range h.entries {
  2360  		if e.Valid() {
  2361  			visit(&e)
  2362  		}
  2363  	}
  2364  }
  2365  
  2366  // Float32MemoTable is a wrapper over the appropriate hashtable to provide an interface
  2367  // conforming to the MemoTable interface defined in the encoding package for general interactions
  2368  // regarding dictionaries.
  2369  type Float32MemoTable struct {
  2370  	tbl     *Float32HashTable
  2371  	nullIdx int32
  2372  }
  2373  
  2374  // NewFloat32MemoTable returns a new memotable with num entries pre-allocated to reduce further
  2375  // allocations when inserting.
  2376  func NewFloat32MemoTable(num int64) *Float32MemoTable {
  2377  	return &Float32MemoTable{tbl: NewFloat32HashTable(uint64(num)), nullIdx: KeyNotFound}
  2378  }
  2379  
  2380  func (Float32MemoTable) TypeTraits() TypeTraits {
  2381  	return arrow.Float32Traits
  2382  }
  2383  
  2384  // Reset allows this table to be re-used by dumping all the data currently in the table.
  2385  func (s *Float32MemoTable) Reset() {
  2386  	s.tbl.Reset(32)
  2387  	s.nullIdx = KeyNotFound
  2388  }
  2389  
  2390  // Size returns the current number of inserted elements into the table including if a null
  2391  // has been inserted.
  2392  func (s *Float32MemoTable) Size() int {
  2393  	sz := int(s.tbl.size)
  2394  	if _, ok := s.GetNull(); ok {
  2395  		sz++
  2396  	}
  2397  	return sz
  2398  }
  2399  
  2400  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
  2401  // that will be true if found and false if not.
  2402  func (s *Float32MemoTable) GetNull() (int, bool) {
  2403  	return int(s.nullIdx), s.nullIdx != KeyNotFound
  2404  }
  2405  
  2406  // GetOrInsertNull will return the index of the null entry or insert a null entry
  2407  // if one currently doesn't exist. The found value will be true if there was already
  2408  // a null in the table, and false if it inserted one.
  2409  func (s *Float32MemoTable) GetOrInsertNull() (idx int, found bool) {
  2410  	idx, found = s.GetNull()
  2411  	if !found {
  2412  		idx = s.Size()
  2413  		s.nullIdx = int32(idx)
  2414  	}
  2415  	return
  2416  }
  2417  
  2418  // CopyValues will copy the values from the memo table out into the passed in slice
  2419  // which must be of the appropriate type.
  2420  func (s *Float32MemoTable) CopyValues(out interface{}) {
  2421  	s.CopyValuesSubset(0, out)
  2422  }
  2423  
  2424  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
  2425  // at the provided start index
  2426  func (s *Float32MemoTable) CopyValuesSubset(start int, out interface{}) {
  2427  	s.tbl.CopyValuesSubset(start, out.([]float32))
  2428  }
  2429  
  2430  func (s *Float32MemoTable) WriteOut(out []byte) {
  2431  	s.tbl.CopyValues(arrow.Float32Traits.CastFromBytes(out))
  2432  }
  2433  
  2434  func (s *Float32MemoTable) WriteOutSubset(start int, out []byte) {
  2435  	s.tbl.CopyValuesSubset(start, arrow.Float32Traits.CastFromBytes(out))
  2436  }
  2437  
  2438  func (s *Float32MemoTable) WriteOutLE(out []byte) {
  2439  	s.tbl.WriteOut(out)
  2440  }
  2441  
  2442  func (s *Float32MemoTable) WriteOutSubsetLE(start int, out []byte) {
  2443  	s.tbl.WriteOutSubset(start, out)
  2444  }
  2445  
  2446  // Get returns the index of the requested value in the hash table or KeyNotFound
  2447  // along with a boolean indicating if it was found or not.
  2448  func (s *Float32MemoTable) Get(val interface{}) (int, bool) {
  2449  	var cmp func(float32) bool
  2450  
  2451  	if math.IsNaN(float64(val.(float32))) {
  2452  		cmp = isNan32Cmp
  2453  		// use consistent internal bit pattern for NaN regardless of the pattern
  2454  		// that is passed to us. NaN is NaN is NaN
  2455  		val = float32(math.NaN())
  2456  	} else {
  2457  		cmp = func(v float32) bool { return val.(float32) == v }
  2458  	}
  2459  
  2460  	h := hashFloat32(val.(float32), 0)
  2461  	if e, ok := s.tbl.Lookup(h, cmp); ok {
  2462  		return int(e.payload.memoIdx), ok
  2463  	}
  2464  	return KeyNotFound, false
  2465  }
  2466  
  2467  // GetOrInsert will return the index of the specified value in the table, or insert the
  2468  // value into the table and return the new index. found indicates whether or not it already
  2469  // existed in the table (true) or was inserted by this call (false).
  2470  func (s *Float32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
  2471  
  2472  	var cmp func(float32) bool
  2473  
  2474  	if math.IsNaN(float64(val.(float32))) {
  2475  		cmp = isNan32Cmp
  2476  		// use consistent internal bit pattern for NaN regardless of the pattern
  2477  		// that is passed to us. NaN is NaN is NaN
  2478  		val = float32(math.NaN())
  2479  	} else {
  2480  		cmp = func(v float32) bool { return val.(float32) == v }
  2481  	}
  2482  
  2483  	h := hashFloat32(val.(float32), 0)
  2484  	e, ok := s.tbl.Lookup(h, cmp)
  2485  
  2486  	if ok {
  2487  		idx = int(e.payload.memoIdx)
  2488  		found = true
  2489  	} else {
  2490  		idx = s.Size()
  2491  		s.tbl.Insert(e, h, val.(float32), int32(idx))
  2492  	}
  2493  	return
  2494  }
  2495  
  2496  type payloadFloat64 struct {
  2497  	val     float64
  2498  	memoIdx int32
  2499  }
  2500  
  2501  type entryFloat64 struct {
  2502  	h       uint64
  2503  	payload payloadFloat64
  2504  }
  2505  
  2506  func (e entryFloat64) Valid() bool { return e.h != sentinel }
  2507  
  2508  // Float64HashTable is a hashtable specifically for float64 that
  2509  // is utilized with the MemoTable to generalize interactions for easier
  2510  // implementation of dictionaries without losing performance.
  2511  type Float64HashTable struct {
  2512  	cap     uint64
  2513  	capMask uint64
  2514  	size    uint64
  2515  
  2516  	entries []entryFloat64
  2517  }
  2518  
  2519  // NewFloat64HashTable returns a new hash table for float64 values
  2520  // initialized with the passed in capacity or 32 whichever is larger.
  2521  func NewFloat64HashTable(cap uint64) *Float64HashTable {
  2522  	initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  2523  	ret := &Float64HashTable{cap: initCap, capMask: initCap - 1, size: 0}
  2524  	ret.entries = make([]entryFloat64, initCap)
  2525  	return ret
  2526  }
  2527  
  2528  // Reset drops all of the values in this hash table and re-initializes it
  2529  // with the specified initial capacity as if by calling New, but without having
  2530  // to reallocate the object.
  2531  func (h *Float64HashTable) Reset(cap uint64) {
  2532  	h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
  2533  	h.capMask = h.cap - 1
  2534  	h.size = 0
  2535  	h.entries = make([]entryFloat64, h.cap)
  2536  }
  2537  
  2538  // CopyValues is used for copying the values out of the hash table into the
  2539  // passed in slice, in the order that they were first inserted
  2540  func (h *Float64HashTable) CopyValues(out []float64) {
  2541  	h.CopyValuesSubset(0, out)
  2542  }
  2543  
  2544  // CopyValuesSubset copies a subset of the values in the hashtable out, starting
  2545  // with the value at start, in the order that they were inserted.
  2546  func (h *Float64HashTable) CopyValuesSubset(start int, out []float64) {
  2547  	h.VisitEntries(func(e *entryFloat64) {
  2548  		idx := e.payload.memoIdx - int32(start)
  2549  		if idx >= 0 {
  2550  			out[idx] = e.payload.val
  2551  		}
  2552  	})
  2553  }
  2554  
  2555  func (h *Float64HashTable) WriteOut(out []byte) {
  2556  	h.WriteOutSubset(0, out)
  2557  }
  2558  
  2559  func (h *Float64HashTable) WriteOutSubset(start int, out []byte) {
  2560  	data := arrow.Float64Traits.CastFromBytes(out)
  2561  	h.VisitEntries(func(e *entryFloat64) {
  2562  		idx := e.payload.memoIdx - int32(start)
  2563  		if idx >= 0 {
  2564  			data[idx] = utils.ToLEFloat64(e.payload.val)
  2565  		}
  2566  	})
  2567  }
  2568  
  2569  func (h *Float64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
  2570  
  2571  func (Float64HashTable) fixHash(v uint64) uint64 {
  2572  	if v == sentinel {
  2573  		return 42
  2574  	}
  2575  	return v
  2576  }
  2577  
  2578  // Lookup retrieves the entry for a given hash value assuming it's payload value returns
  2579  // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
  2580  // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
  2581  func (h *Float64HashTable) Lookup(v uint64, cmp func(float64) bool) (*entryFloat64, bool) {
  2582  	idx, ok := h.lookup(v, h.capMask, cmp)
  2583  	return &h.entries[idx], ok
  2584  }
  2585  
  2586  func (h *Float64HashTable) lookup(v uint64, szMask uint64, cmp func(float64) bool) (uint64, bool) {
  2587  	const perturbShift uint8 = 5
  2588  
  2589  	var (
  2590  		idx     uint64
  2591  		perturb uint64
  2592  		e       *entryFloat64
  2593  	)
  2594  
  2595  	v = h.fixHash(v)
  2596  	idx = v & szMask
  2597  	perturb = (v >> uint64(perturbShift)) + 1
  2598  
  2599  	for {
  2600  		e = &h.entries[idx]
  2601  		if e.h == v && cmp(e.payload.val) {
  2602  			return idx, true
  2603  		}
  2604  
  2605  		if e.h == sentinel {
  2606  			return idx, false
  2607  		}
  2608  
  2609  		// perturbation logic inspired from CPython's set/dict object
  2610  		// the goal is that all 64 bits of unmasked hash value eventually
  2611  		// participate int he probing sequence, to minimize clustering
  2612  		idx = (idx + perturb) & szMask
  2613  		perturb = (perturb >> uint64(perturbShift)) + 1
  2614  	}
  2615  }
  2616  
  2617  func (h *Float64HashTable) upsize(newcap uint64) error {
  2618  	newMask := newcap - 1
  2619  
  2620  	oldEntries := h.entries
  2621  	h.entries = make([]entryFloat64, newcap)
  2622  	for _, e := range oldEntries {
  2623  		if e.Valid() {
  2624  			idx, _ := h.lookup(e.h, newMask, func(float64) bool { return false })
  2625  			h.entries[idx] = e
  2626  		}
  2627  	}
  2628  	h.cap = newcap
  2629  	h.capMask = newMask
  2630  	return nil
  2631  }
  2632  
  2633  // Insert updates the given entry with the provided hash value, payload value and memo index.
  2634  // The entry pointer must have been retrieved via lookup in order to actually insert properly.
  2635  func (h *Float64HashTable) Insert(e *entryFloat64, v uint64, val float64, memoIdx int32) error {
  2636  	e.h = h.fixHash(v)
  2637  	e.payload.val = val
  2638  	e.payload.memoIdx = memoIdx
  2639  	h.size++
  2640  
  2641  	if h.needUpsize() {
  2642  		h.upsize(h.cap * uint64(loadFactor) * 2)
  2643  	}
  2644  	return nil
  2645  }
  2646  
  2647  // VisitEntries will call the passed in function on each *valid* entry in the hash table,
  2648  // a valid entry being one which has had a value inserted into it.
  2649  func (h *Float64HashTable) VisitEntries(visit func(*entryFloat64)) {
  2650  	for _, e := range h.entries {
  2651  		if e.Valid() {
  2652  			visit(&e)
  2653  		}
  2654  	}
  2655  }
  2656  
  2657  // Float64MemoTable is a wrapper over the appropriate hashtable to provide an interface
  2658  // conforming to the MemoTable interface defined in the encoding package for general interactions
  2659  // regarding dictionaries.
  2660  type Float64MemoTable struct {
  2661  	tbl     *Float64HashTable
  2662  	nullIdx int32
  2663  }
  2664  
  2665  // NewFloat64MemoTable returns a new memotable with num entries pre-allocated to reduce further
  2666  // allocations when inserting.
  2667  func NewFloat64MemoTable(num int64) *Float64MemoTable {
  2668  	return &Float64MemoTable{tbl: NewFloat64HashTable(uint64(num)), nullIdx: KeyNotFound}
  2669  }
  2670  
  2671  func (Float64MemoTable) TypeTraits() TypeTraits {
  2672  	return arrow.Float64Traits
  2673  }
  2674  
  2675  // Reset allows this table to be re-used by dumping all the data currently in the table.
  2676  func (s *Float64MemoTable) Reset() {
  2677  	s.tbl.Reset(32)
  2678  	s.nullIdx = KeyNotFound
  2679  }
  2680  
  2681  // Size returns the current number of inserted elements into the table including if a null
  2682  // has been inserted.
  2683  func (s *Float64MemoTable) Size() int {
  2684  	sz := int(s.tbl.size)
  2685  	if _, ok := s.GetNull(); ok {
  2686  		sz++
  2687  	}
  2688  	return sz
  2689  }
  2690  
  2691  // GetNull returns the index of an inserted null or KeyNotFound along with a bool
  2692  // that will be true if found and false if not.
  2693  func (s *Float64MemoTable) GetNull() (int, bool) {
  2694  	return int(s.nullIdx), s.nullIdx != KeyNotFound
  2695  }
  2696  
  2697  // GetOrInsertNull will return the index of the null entry or insert a null entry
  2698  // if one currently doesn't exist. The found value will be true if there was already
  2699  // a null in the table, and false if it inserted one.
  2700  func (s *Float64MemoTable) GetOrInsertNull() (idx int, found bool) {
  2701  	idx, found = s.GetNull()
  2702  	if !found {
  2703  		idx = s.Size()
  2704  		s.nullIdx = int32(idx)
  2705  	}
  2706  	return
  2707  }
  2708  
  2709  // CopyValues will copy the values from the memo table out into the passed in slice
  2710  // which must be of the appropriate type.
  2711  func (s *Float64MemoTable) CopyValues(out interface{}) {
  2712  	s.CopyValuesSubset(0, out)
  2713  }
  2714  
  2715  // CopyValuesSubset is like CopyValues but only copies a subset of values starting
  2716  // at the provided start index
  2717  func (s *Float64MemoTable) CopyValuesSubset(start int, out interface{}) {
  2718  	s.tbl.CopyValuesSubset(start, out.([]float64))
  2719  }
  2720  
  2721  func (s *Float64MemoTable) WriteOut(out []byte) {
  2722  	s.tbl.CopyValues(arrow.Float64Traits.CastFromBytes(out))
  2723  }
  2724  
  2725  func (s *Float64MemoTable) WriteOutSubset(start int, out []byte) {
  2726  	s.tbl.CopyValuesSubset(start, arrow.Float64Traits.CastFromBytes(out))
  2727  }
  2728  
  2729  func (s *Float64MemoTable) WriteOutLE(out []byte) {
  2730  	s.tbl.WriteOut(out)
  2731  }
  2732  
  2733  func (s *Float64MemoTable) WriteOutSubsetLE(start int, out []byte) {
  2734  	s.tbl.WriteOutSubset(start, out)
  2735  }
  2736  
  2737  // Get returns the index of the requested value in the hash table or KeyNotFound
  2738  // along with a boolean indicating if it was found or not.
  2739  func (s *Float64MemoTable) Get(val interface{}) (int, bool) {
  2740  	var cmp func(float64) bool
  2741  	if math.IsNaN(val.(float64)) {
  2742  		cmp = math.IsNaN
  2743  		// use consistent internal bit pattern for NaN regardless of the pattern
  2744  		// that is passed to us. NaN is NaN is NaN
  2745  		val = math.NaN()
  2746  	} else {
  2747  		cmp = func(v float64) bool { return val.(float64) == v }
  2748  	}
  2749  
  2750  	h := hashFloat64(val.(float64), 0)
  2751  	if e, ok := s.tbl.Lookup(h, cmp); ok {
  2752  		return int(e.payload.memoIdx), ok
  2753  	}
  2754  	return KeyNotFound, false
  2755  }
  2756  
  2757  // GetOrInsert will return the index of the specified value in the table, or insert the
  2758  // value into the table and return the new index. found indicates whether or not it already
  2759  // existed in the table (true) or was inserted by this call (false).
  2760  func (s *Float64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
  2761  
  2762  	var cmp func(float64) bool
  2763  	if math.IsNaN(val.(float64)) {
  2764  		cmp = math.IsNaN
  2765  		// use consistent internal bit pattern for NaN regardless of the pattern
  2766  		// that is passed to us. NaN is NaN is NaN
  2767  		val = math.NaN()
  2768  	} else {
  2769  		cmp = func(v float64) bool { return val.(float64) == v }
  2770  	}
  2771  
  2772  	h := hashFloat64(val.(float64), 0)
  2773  	e, ok := s.tbl.Lookup(h, cmp)
  2774  
  2775  	if ok {
  2776  		idx = int(e.payload.memoIdx)
  2777  		found = true
  2778  	} else {
  2779  		idx = s.Size()
  2780  		s.tbl.Insert(e, h, val.(float64), int32(idx))
  2781  	}
  2782  	return
  2783  }