github.com/scottcagno/storage@v1.8.0/pkg/hashmap/openaddr/rhhmap_gp.go (about)

     1  package openaddr
     2  
     3  import (
     4  	"github.com/scottcagno/storage/pkg/hash/murmur3"
     5  	"reflect"
     6  	"unsafe"
     7  )
     8  
     9  // entryGP is a key value pair that is found in each bucketGP
    10  type entryGP struct {
    11  	key string
    12  	val interface{}
    13  }
    14  
    15  // bucketGP represents a single slot in the HashMapGP table
    16  type bucketGP struct {
    17  	dib     uint8
    18  	hashkey uint64
    19  	entryGP
    20  }
    21  
    22  // checkHashAndKey checks if this bucketGP matches the specified hashkey and key
    23  func (b *bucketGP) checkHashAndKey(hashkey uint64, key string) bool {
    24  	return b.hashkey == hashkey && b.entryGP.key == key
    25  }
    26  
    27  // HashMapGP represents a closed hashing hashtable implementation
    28  type HashMapGP struct {
    29  	hash    hashFuncGP
    30  	mask    uint64
    31  	expand  uint
    32  	shrink  uint
    33  	keys    uint
    34  	size    uint
    35  	buckets []bucketGP
    36  }
    37  
    38  // defaultHashFunc is the default hashFunc used. This is here mainly as
    39  // a convenience for the sharded hashmap to utilize
    40  func defaultHashFuncGP(key string) uint64 {
    41  	return murmur3.Sum64(*(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
    42  		Data: uintptr(unsafe.Pointer(&key)),
    43  		Len:  8,
    44  		Cap:  8,
    45  	})))
    46  }
    47  
    48  // hashFunc is a type definition for what a hash function should look like
    49  type hashFuncGP func(key string) uint64
    50  
    51  // NewHashMapGP returns a new HashMapGP instantiated with the specified size or
    52  // the DefaultMapSize, whichever is larger
    53  func NewHashMapGP(size uint) *HashMapGP {
    54  	return newHashMapGP(size, defaultHashFuncGP)
    55  }
    56  
    57  // newHashMap is the internal variant of the previous function
    58  // and is mainly used internally
    59  func newHashMapGP(size uint, hash hashFuncGP) *HashMapGP {
    60  	bukCnt := alignBucketCount(size)
    61  	if hash == nil {
    62  		hash = defaultHashFuncGP
    63  	}
    64  	m := &HashMapGP{
    65  		hash:    hash,
    66  		mask:    bukCnt - 1, // this minus one is extremely important for using a mask over modulo
    67  		expand:  uint(float64(bukCnt) * DefaultLoadFactor),
    68  		shrink:  uint(float64(bukCnt) * (1 - DefaultLoadFactor)),
    69  		keys:    0,
    70  		size:    size,
    71  		buckets: make([]bucketGP, bukCnt),
    72  	}
    73  	return m
    74  }
    75  
    76  // resize grows or shrinks the HashMapGP by the newSize provided. It makes a
    77  // new map with the new size, copies everything over, and then frees the old map
    78  func (m *HashMapGP) resize(newSize uint) {
    79  	newHM := newHashMapGP(newSize, m.hash)
    80  	var buk bucketGP
    81  	for i := 0; i < len(m.buckets); i++ {
    82  		buk = m.buckets[i]
    83  		if buk.dib > 0 {
    84  			newHM.insertInternal(buk.hashkey, buk.entryGP.key, buk.entryGP.val)
    85  		}
    86  	}
    87  	tsize := m.size
    88  	*m = *newHM
    89  	m.size = tsize
    90  }
    91  
    92  // Get returns a value for a given key, or returns false if none could be found
    93  // Get can be considered the exported version of the lookup call
    94  func (m *HashMapGP) Get(key string) (interface{}, bool) {
    95  	return m.lookup(0, key)
    96  }
    97  
    98  // lookup returns a value for a given key, or returns false if none could be found
    99  func (m *HashMapGP) lookup(hashkey uint64, key string) (interface{}, bool) {
   100  	// check if map is empty
   101  	if len(m.buckets) == 0 {
   102  		// hopefully this should never really happen
   103  		// do we really need to check this here?
   104  		*m = *newHashMapGP(DefaultMapSize, m.hash)
   105  	}
   106  	if hashkey == 0 {
   107  		// calculate the hashkey value
   108  		hashkey = m.hash(key)
   109  	}
   110  	// mask the hashkey to get the initial index
   111  	i := hashkey & m.mask
   112  	// search the position linearly
   113  	for {
   114  		// havent located anything
   115  		if m.buckets[i].dib == 0 {
   116  			return 0, false
   117  		}
   118  		// check for matching hashes and keys
   119  		if m.buckets[i].checkHashAndKey(hashkey, key) {
   120  			return m.buckets[i].entryGP.val, true
   121  		}
   122  		// keep on probing
   123  		i = (i + 1) & m.mask
   124  	}
   125  }
   126  
   127  // Set inserts a key value entryGP and returns the previous value or false
   128  // Set can be considered the exported version of the insert call
   129  func (m *HashMapGP) Set(key string, value interface{}) (interface{}, bool) {
   130  	return m.insert(0, key, value)
   131  }
   132  
   133  // insert inserts a key value entryGP and returns the previous value, or false
   134  func (m *HashMapGP) insert(hashkey uint64, key string, value interface{}) (interface{}, bool) {
   135  	// check if map is empty
   136  	if len(m.buckets) == 0 {
   137  		// create a new map with default size
   138  		*m = *newHashMapGP(DefaultMapSize, m.hash)
   139  	}
   140  	// check and see if we need to resize
   141  	if m.keys >= m.expand {
   142  		// if we do, then double the map size
   143  		m.resize(uint(len(m.buckets)) * 2)
   144  	}
   145  	if hashkey == 0 {
   146  		// calculate the hashkey value
   147  		hashkey = m.hash(key)
   148  	}
   149  	// call the internal insert to insert the entryGP
   150  	return m.insertInternal(hashkey, key, value)
   151  }
   152  
   153  // insertInternal inserts a key value entryGP and returns the previous value, or false
   154  func (m *HashMapGP) insertInternal(hashkey uint64, key string, value interface{}) (interface{}, bool) {
   155  	// create a new entryGP to insert
   156  	newb := bucketGP{
   157  		dib:     1,
   158  		hashkey: hashkey,
   159  		entryGP: entryGP{
   160  			key: key,
   161  			val: value,
   162  		},
   163  	}
   164  	// mask the hashkey to get the initial index
   165  	i := newb.hashkey & m.mask
   166  	// search the position linearly
   167  	for {
   168  		// we found a spot, insert a new entryGP
   169  		if m.buckets[i].dib == 0 {
   170  			m.buckets[i] = newb
   171  			m.keys++
   172  			// no previous value to return, as this is a new entryGP
   173  			return 0, false
   174  		}
   175  		// found existing entryGP, check hashes and keys
   176  		if m.buckets[i].checkHashAndKey(newb.hashkey, newb.entryGP.key) {
   177  			// hashes and keys are a match--update entryGP and return previous values
   178  			oldval := m.buckets[i].entryGP.val
   179  			m.buckets[i].val = newb.entryGP.val
   180  			return oldval, true
   181  		}
   182  		// we did not find an empty slot or an existing matching entryGP
   183  		// so check this entries dib against our new entryGP's dib
   184  		if m.buckets[i].dib < newb.dib {
   185  			// current position's dib is less than our new entryGP's, swap
   186  			newb, m.buckets[i] = m.buckets[i], newb
   187  		}
   188  		// keep on probing until we find what we're looking for.
   189  		// increase our search index by one as well as our new
   190  		// entryGP's dib, then continue with the linear probe.
   191  		i = (i + 1) & m.mask
   192  		newb.dib = newb.dib + 1
   193  	}
   194  }
   195  
   196  // Del removes a value for a given key and returns the deleted value, or false
   197  // Del can be considered the exported version of the delete call
   198  func (m *HashMapGP) Del(key string) (interface{}, bool) {
   199  	return m.delete(0, key)
   200  }
   201  
   202  // delete removes a value for a given key and returns the deleted value, or false
   203  func (m *HashMapGP) delete(hashkey uint64, key string) (interface{}, bool) {
   204  	// check if map is empty
   205  	if len(m.buckets) == 0 {
   206  		// nothing to see here folks
   207  		return 0, false
   208  	}
   209  	if hashkey == 0 {
   210  		// calculate the hashkey value
   211  		hashkey = m.hash(key)
   212  	}
   213  	// mask the hashkey to get the initial index
   214  	i := hashkey & m.mask
   215  	// search the position linearly
   216  	for {
   217  		// havent located anything
   218  		if m.buckets[i].dib == 0 {
   219  			return 0, false
   220  		}
   221  		// found existing entryGP, check hashes and keys
   222  		if m.buckets[i].checkHashAndKey(hashkey, key) {
   223  			// hashes and keys are a match--delete entryGP and return previous values
   224  			oldval := m.buckets[i].entryGP.val
   225  			m.deleteInternal(i)
   226  			return oldval, true
   227  		}
   228  		// keep on probing until we find what we're looking for.
   229  		// increase our search index by one as well as our new
   230  		// entryGP's dib, then continue with the linear probe.
   231  		i = (i + 1) & m.mask
   232  	}
   233  }
   234  
   235  // delete removes a value for a given key and returns the deleted value, or false
   236  func (m *HashMapGP) deleteInternal(i uint64) {
   237  	// set dib at bucketGP i
   238  	m.buckets[i].dib = 0
   239  	// tombstone index and shift
   240  	for {
   241  		pi := i
   242  		i = (i + 1) & m.mask
   243  		if m.buckets[i].dib <= 1 {
   244  			// im as free as a bird now!
   245  			m.buckets[pi].entryGP = *new(entryGP)
   246  			m.buckets[pi] = *new(bucketGP)
   247  			break
   248  		}
   249  		// shift
   250  		m.buckets[pi] = m.buckets[i]
   251  		m.buckets[pi].dib = m.buckets[pi].dib - 1
   252  	}
   253  	// decrement entryGP count
   254  	m.keys--
   255  	// check and see if we need to resize
   256  	if m.keys <= m.shrink && uint(len(m.buckets)) > m.size {
   257  		// if it checks out, then resize down by 25%-ish
   258  		m.resize(m.keys)
   259  	}
   260  }
   261  
   262  // IteratorGP is an iterator function type
   263  type IteratorGP func(key string, value interface{}) bool
   264  
   265  // Range takes an Iterator and ranges the HashMapGP as long as long
   266  // as the iterator function continues to be true. Range is not
   267  // safe to perform an insert or remove operation while ranging!
   268  func (m *HashMapGP) Range(it IteratorGP) {
   269  	for i := 0; i < len(m.buckets); i++ {
   270  		if m.buckets[i].dib < 1 {
   271  			continue
   272  		}
   273  		if !it(m.buckets[i].key, m.buckets[i].val) {
   274  			return
   275  		}
   276  	}
   277  }
   278  
   279  // GetHighestDIB returns the highest distance to initial bucketGP value in the table
   280  func (m *HashMapGP) GetHighestDIB() uint8 {
   281  	var hdib uint8
   282  	for i := 0; i < len(m.buckets); i++ {
   283  		if m.buckets[i].dib > hdib {
   284  			hdib = m.buckets[i].dib
   285  		}
   286  	}
   287  	return hdib
   288  }
   289  
   290  // PercentFull returns the current load factor of the HashMapGP
   291  func (m *HashMapGP) PercentFull() float64 {
   292  	return float64(m.keys) / float64(len(m.buckets))
   293  }
   294  
   295  // Len returns the number of entries currently in the HashMapGP
   296  func (m *HashMapGP) Len() int {
   297  	return int(m.keys)
   298  }
   299  
   300  // Close closes and frees the current hashmap. Calling any method
   301  // on the HashMapGP after this will most likely result in a panic
   302  func (m *HashMapGP) Close() {
   303  	destroyHashMapGP(m)
   304  }
   305  
   306  // destroy does exactly what is sounds like it does
   307  func destroyHashMapGP(m *HashMapGP) {
   308  	m = nil
   309  }