github.com/scottcagno/storage@v1.8.0/pkg/hashmap/openaddr/rhhmap.go

github.com/scottcagno/storage@v1.8.0/pkg/hashmap/openaddr/rhhmap.go (about)

     1  package openaddr
     2  
     3  import "github.com/scottcagno/storage/pkg/hash/murmur3"
     4  
     5  // entry is a key value pair that is found in each bucket
     6  type entry struct {
     7  	key string
     8  	val []byte
     9  }
    10  
    11  // bucket represents a single slot in the HashMap table
    12  type bucket struct {
    13  	dib     uint8
    14  	hashkey uint64
    15  	entry
    16  }
    17  
    18  // checkHashAndKey checks if this bucket matches the specified hashkey and key
    19  func (b *bucket) checkHashAndKey(hashkey uint64, key string) bool {
    20  	return b.hashkey == hashkey && b.entry.key == key
    21  }
    22  
    23  // HashMap represents a closed hashing hashtable implementation
    24  type HashMap struct {
    25  	hash    hashFunc
    26  	mask    uint64
    27  	expand  uint
    28  	shrink  uint
    29  	keys    uint
    30  	size    uint
    31  	buckets []bucket
    32  }
    33  
    34  // defaultHashFunc is the default hashFunc used. This is here mainly as
    35  // a convenience for the sharded hashmap to utilize
    36  func defaultHashFunc(key string) uint64 {
    37  	return murmur3.Sum64([]byte(key))
    38  }
    39  
    40  // hashFunc is a type definition for what a hash function should look like
    41  type hashFunc func(key string) uint64
    42  
    43  // NewHashMap returns a new HashMap instantiated with the specified size or
    44  // the DefaultMapSize, whichever is larger
    45  func NewHashMap(size uint) *HashMap {
    46  	return newHashMap(size, defaultHashFunc)
    47  }
    48  
    49  // newHashMap is the internal variant of the previous function
    50  // and is mainly used internally
    51  func newHashMap(size uint, hash hashFunc) *HashMap {
    52  	bukCnt := alignBucketCount(size)
    53  	if hash == nil {
    54  		hash = defaultHashFunc
    55  	}
    56  	m := &HashMap{
    57  		hash:    hash,
    58  		mask:    bukCnt - 1, // this minus one is extremely important for using a mask over modulo
    59  		expand:  uint(float64(bukCnt) * DefaultLoadFactor),
    60  		shrink:  uint(float64(bukCnt) * (1 - DefaultLoadFactor)),
    61  		keys:    0,
    62  		size:    size,
    63  		buckets: make([]bucket, bukCnt),
    64  	}
    65  	return m
    66  }
    67  
    68  // resize grows or shrinks the HashMap by the newSize provided. It makes a
    69  // new map with the new size, copies everything over, and then frees the old map
    70  func (m *HashMap) resize(newSize uint) {
    71  	newHM := newHashMap(newSize, m.hash)
    72  	var buk bucket
    73  	for i := 0; i < len(m.buckets); i++ {
    74  		buk = m.buckets[i]
    75  		if buk.dib > 0 {
    76  			newHM.insertInternal(buk.hashkey, buk.entry.key, buk.entry.val)
    77  		}
    78  	}
    79  	tsize := m.size
    80  	*m = *newHM
    81  	m.size = tsize
    82  }
    83  
    84  // Get returns a value for a given key, or returns false if none could be found
    85  // Get can be considered the exported version of the lookup call
    86  func (m *HashMap) Get(key string) ([]byte, bool) {
    87  	return m.lookup(0, key)
    88  }
    89  
    90  // lookup returns a value for a given key, or returns false if none could be found
    91  func (m *HashMap) lookup(hashkey uint64, key string) ([]byte, bool) {
    92  	// check if map is empty
    93  	if len(m.buckets) == 0 {
    94  		// hopefully this should never really happen
    95  		// do we really need to check this here?
    96  		*m = *newHashMap(DefaultMapSize, m.hash)
    97  	}
    98  	if hashkey == 0 {
    99  		// calculate the hashkey value
   100  		hashkey = m.hash(key)
   101  	}
   102  	// mask the hashkey to get the initial index
   103  	i := hashkey & m.mask
   104  	// search the position linearly
   105  	for {
   106  		// havent located anything
   107  		if m.buckets[i].dib == 0 {
   108  			return nil, false
   109  		}
   110  		// check for matching hashes and keys
   111  		if m.buckets[i].checkHashAndKey(hashkey, key) {
   112  			return m.buckets[i].entry.val, true
   113  		}
   114  		// keep on probing
   115  		i = (i + 1) & m.mask
   116  	}
   117  }
   118  
   119  // Set inserts a key value entry and returns the previous value or false
   120  // Set can be considered the exported version of the insert call
   121  func (m *HashMap) Set(key string, value []byte) ([]byte, bool) {
   122  	return m.insert(0, key, value)
   123  }
   124  
   125  // insert inserts a key value entry and returns the previous value, or false
   126  func (m *HashMap) insert(hashkey uint64, key string, value []byte) ([]byte, bool) {
   127  	// check if map is empty
   128  	if len(m.buckets) == 0 {
   129  		// create a new map with default size
   130  		*m = *newHashMap(DefaultMapSize, m.hash)
   131  	}
   132  	// check and see if we need to resize
   133  	if m.keys >= m.expand {
   134  		// if we do, then double the map size
   135  		m.resize(uint(len(m.buckets)) * 2)
   136  	}
   137  	if hashkey == 0 {
   138  		// calculate the hashkey value
   139  		hashkey = m.hash(key)
   140  	}
   141  	// call the internal insert to insert the entry
   142  	return m.insertInternal(hashkey, key, value)
   143  }
   144  
   145  // insertInternal inserts a key value entry and returns the previous value, or false
   146  func (m *HashMap) insertInternal(hashkey uint64, key string, value []byte) ([]byte, bool) {
   147  	// create a new entry to insert
   148  	newb := bucket{
   149  		dib:     1,
   150  		hashkey: hashkey,
   151  		entry: entry{
   152  			key: key,
   153  			val: value,
   154  		},
   155  	}
   156  	// mask the hashkey to get the initial index
   157  	i := newb.hashkey & m.mask
   158  	// search the position linearly
   159  	for {
   160  		// we found a spot, insert a new entry
   161  		if m.buckets[i].dib == 0 {
   162  			m.buckets[i] = newb
   163  			m.keys++
   164  			// no previous value to return, as this is a new entry
   165  			return nil, false
   166  		}
   167  		// found existing entry, check hashes and keys
   168  		if m.buckets[i].checkHashAndKey(newb.hashkey, newb.entry.key) {
   169  			// hashes and keys are a match--update entry and return previous values
   170  			oldval := m.buckets[i].entry.val
   171  			m.buckets[i].val = newb.entry.val
   172  			return oldval, true
   173  		}
   174  		// we did not find an empty slot or an existing matching entry
   175  		// so check this entries dib against our new entry's dib
   176  		if m.buckets[i].dib < newb.dib {
   177  			// current position's dib is less than our new entry's, swap
   178  			newb, m.buckets[i] = m.buckets[i], newb
   179  		}
   180  		// keep on probing until we find what we're looking for.
   181  		// increase our search index by one as well as our new
   182  		// entry's dib, then continue with the linear probe.
   183  		i = (i + 1) & m.mask
   184  		newb.dib = newb.dib + 1
   185  	}
   186  }
   187  
   188  // Del removes a value for a given key and returns the deleted value, or false
   189  // Del can be considered the exported version of the delete call
   190  func (m *HashMap) Del(key string) ([]byte, bool) {
   191  	return m.delete(0, key)
   192  }
   193  
   194  // delete removes a value for a given key and returns the deleted value, or false
   195  func (m *HashMap) delete(hashkey uint64, key string) ([]byte, bool) {
   196  	// check if map is empty
   197  	if len(m.buckets) == 0 {
   198  		// nothing to see here folks
   199  		return nil, false
   200  	}
   201  	if hashkey == 0 {
   202  		// calculate the hashkey value
   203  		hashkey = m.hash(key)
   204  	}
   205  	// mask the hashkey to get the initial index
   206  	i := hashkey & m.mask
   207  	// search the position linearly
   208  	for {
   209  		// havent located anything
   210  		if m.buckets[i].dib == 0 {
   211  			return nil, false
   212  		}
   213  		// found existing entry, check hashes and keys
   214  		if m.buckets[i].checkHashAndKey(hashkey, key) {
   215  			// hashes and keys are a match--delete entry and return previous values
   216  			oldval := m.buckets[i].entry.val
   217  			m.deleteInternal(i)
   218  			return oldval, true
   219  		}
   220  		// keep on probing until we find what we're looking for.
   221  		// increase our search index by one as well as our new
   222  		// entry's dib, then continue with the linear probe.
   223  		i = (i + 1) & m.mask
   224  	}
   225  }
   226  
   227  // delete removes a value for a given key and returns the deleted value, or false
   228  func (m *HashMap) deleteInternal(i uint64) {
   229  	// set dib at bucket i
   230  	m.buckets[i].dib = 0
   231  	// tombstone index and shift
   232  	for {
   233  		pi := i
   234  		i = (i + 1) & m.mask
   235  		if m.buckets[i].dib <= 1 {
   236  			// im as free as a bird now!
   237  			m.buckets[pi].entry = *new(entry)
   238  			m.buckets[pi] = *new(bucket)
   239  			break
   240  		}
   241  		// shift
   242  		m.buckets[pi] = m.buckets[i]
   243  		m.buckets[pi].dib = m.buckets[pi].dib - 1
   244  	}
   245  	// decrement entry count
   246  	m.keys--
   247  	// check and see if we need to resize
   248  	if m.keys <= m.shrink && uint(len(m.buckets)) > m.size {
   249  		// if it checks out, then resize down by 25%-ish
   250  		m.resize(m.keys)
   251  	}
   252  }
   253  
   254  // Iterator is an iterator function type
   255  type Iterator func(key string, value []byte) bool
   256  
   257  // Range takes an Iterator and ranges the HashMap as long as long
   258  // as the iterator function continues to be true. Range is not
   259  // safe to perform an insert or remove operation while ranging!
   260  func (m *HashMap) Range(it Iterator) {
   261  	for i := 0; i < len(m.buckets); i++ {
   262  		if m.buckets[i].dib < 1 {
   263  			continue
   264  		}
   265  		if !it(m.buckets[i].key, m.buckets[i].val) {
   266  			return
   267  		}
   268  	}
   269  }
   270  
   271  // GetHighestDIB returns the highest distance to initial bucket value in the table
   272  func (m *HashMap) GetHighestDIB() uint8 {
   273  	var hdib uint8
   274  	for i := 0; i < len(m.buckets); i++ {
   275  		if m.buckets[i].dib > hdib {
   276  			hdib = m.buckets[i].dib
   277  		}
   278  	}
   279  	return hdib
   280  }
   281  
   282  // PercentFull returns the current load factor of the HashMap
   283  func (m *HashMap) PercentFull() float64 {
   284  	return float64(m.keys) / float64(len(m.buckets))
   285  }
   286  
   287  // Len returns the number of entries currently in the HashMap
   288  func (m *HashMap) Len() int {
   289  	return int(m.keys)
   290  }
   291  
   292  // Close closes and frees the current hashmap. Calling any method
   293  // on the HashMap after this will most likely result in a panic
   294  func (m *HashMap) Close() {
   295  	destroyMap(m)
   296  }
   297  
   298  // destroy does exactly what is sounds like it does
   299  func destroyMap(m *HashMap) {
   300  	m = nil
   301  }