github.com/scottcagno/storage@v1.8.0/pkg/hashmap/openaddr/rhhmap.go (about) 1 package openaddr 2 3 import "github.com/scottcagno/storage/pkg/hash/murmur3" 4 5 // entry is a key value pair that is found in each bucket 6 type entry struct { 7 key string 8 val []byte 9 } 10 11 // bucket represents a single slot in the HashMap table 12 type bucket struct { 13 dib uint8 14 hashkey uint64 15 entry 16 } 17 18 // checkHashAndKey checks if this bucket matches the specified hashkey and key 19 func (b *bucket) checkHashAndKey(hashkey uint64, key string) bool { 20 return b.hashkey == hashkey && b.entry.key == key 21 } 22 23 // HashMap represents a closed hashing hashtable implementation 24 type HashMap struct { 25 hash hashFunc 26 mask uint64 27 expand uint 28 shrink uint 29 keys uint 30 size uint 31 buckets []bucket 32 } 33 34 // defaultHashFunc is the default hashFunc used. This is here mainly as 35 // a convenience for the sharded hashmap to utilize 36 func defaultHashFunc(key string) uint64 { 37 return murmur3.Sum64([]byte(key)) 38 } 39 40 // hashFunc is a type definition for what a hash function should look like 41 type hashFunc func(key string) uint64 42 43 // NewHashMap returns a new HashMap instantiated with the specified size or 44 // the DefaultMapSize, whichever is larger 45 func NewHashMap(size uint) *HashMap { 46 return newHashMap(size, defaultHashFunc) 47 } 48 49 // newHashMap is the internal variant of the previous function 50 // and is mainly used internally 51 func newHashMap(size uint, hash hashFunc) *HashMap { 52 bukCnt := alignBucketCount(size) 53 if hash == nil { 54 hash = defaultHashFunc 55 } 56 m := &HashMap{ 57 hash: hash, 58 mask: bukCnt - 1, // this minus one is extremely important for using a mask over modulo 59 expand: uint(float64(bukCnt) * DefaultLoadFactor), 60 shrink: uint(float64(bukCnt) * (1 - DefaultLoadFactor)), 61 keys: 0, 62 size: size, 63 buckets: make([]bucket, bukCnt), 64 } 65 return m 66 } 67 68 // resize grows or shrinks the HashMap by the newSize provided. It makes a 69 // new map with the new size, copies everything over, and then frees the old map 70 func (m *HashMap) resize(newSize uint) { 71 newHM := newHashMap(newSize, m.hash) 72 var buk bucket 73 for i := 0; i < len(m.buckets); i++ { 74 buk = m.buckets[i] 75 if buk.dib > 0 { 76 newHM.insertInternal(buk.hashkey, buk.entry.key, buk.entry.val) 77 } 78 } 79 tsize := m.size 80 *m = *newHM 81 m.size = tsize 82 } 83 84 // Get returns a value for a given key, or returns false if none could be found 85 // Get can be considered the exported version of the lookup call 86 func (m *HashMap) Get(key string) ([]byte, bool) { 87 return m.lookup(0, key) 88 } 89 90 // lookup returns a value for a given key, or returns false if none could be found 91 func (m *HashMap) lookup(hashkey uint64, key string) ([]byte, bool) { 92 // check if map is empty 93 if len(m.buckets) == 0 { 94 // hopefully this should never really happen 95 // do we really need to check this here? 96 *m = *newHashMap(DefaultMapSize, m.hash) 97 } 98 if hashkey == 0 { 99 // calculate the hashkey value 100 hashkey = m.hash(key) 101 } 102 // mask the hashkey to get the initial index 103 i := hashkey & m.mask 104 // search the position linearly 105 for { 106 // havent located anything 107 if m.buckets[i].dib == 0 { 108 return nil, false 109 } 110 // check for matching hashes and keys 111 if m.buckets[i].checkHashAndKey(hashkey, key) { 112 return m.buckets[i].entry.val, true 113 } 114 // keep on probing 115 i = (i + 1) & m.mask 116 } 117 } 118 119 // Set inserts a key value entry and returns the previous value or false 120 // Set can be considered the exported version of the insert call 121 func (m *HashMap) Set(key string, value []byte) ([]byte, bool) { 122 return m.insert(0, key, value) 123 } 124 125 // insert inserts a key value entry and returns the previous value, or false 126 func (m *HashMap) insert(hashkey uint64, key string, value []byte) ([]byte, bool) { 127 // check if map is empty 128 if len(m.buckets) == 0 { 129 // create a new map with default size 130 *m = *newHashMap(DefaultMapSize, m.hash) 131 } 132 // check and see if we need to resize 133 if m.keys >= m.expand { 134 // if we do, then double the map size 135 m.resize(uint(len(m.buckets)) * 2) 136 } 137 if hashkey == 0 { 138 // calculate the hashkey value 139 hashkey = m.hash(key) 140 } 141 // call the internal insert to insert the entry 142 return m.insertInternal(hashkey, key, value) 143 } 144 145 // insertInternal inserts a key value entry and returns the previous value, or false 146 func (m *HashMap) insertInternal(hashkey uint64, key string, value []byte) ([]byte, bool) { 147 // create a new entry to insert 148 newb := bucket{ 149 dib: 1, 150 hashkey: hashkey, 151 entry: entry{ 152 key: key, 153 val: value, 154 }, 155 } 156 // mask the hashkey to get the initial index 157 i := newb.hashkey & m.mask 158 // search the position linearly 159 for { 160 // we found a spot, insert a new entry 161 if m.buckets[i].dib == 0 { 162 m.buckets[i] = newb 163 m.keys++ 164 // no previous value to return, as this is a new entry 165 return nil, false 166 } 167 // found existing entry, check hashes and keys 168 if m.buckets[i].checkHashAndKey(newb.hashkey, newb.entry.key) { 169 // hashes and keys are a match--update entry and return previous values 170 oldval := m.buckets[i].entry.val 171 m.buckets[i].val = newb.entry.val 172 return oldval, true 173 } 174 // we did not find an empty slot or an existing matching entry 175 // so check this entries dib against our new entry's dib 176 if m.buckets[i].dib < newb.dib { 177 // current position's dib is less than our new entry's, swap 178 newb, m.buckets[i] = m.buckets[i], newb 179 } 180 // keep on probing until we find what we're looking for. 181 // increase our search index by one as well as our new 182 // entry's dib, then continue with the linear probe. 183 i = (i + 1) & m.mask 184 newb.dib = newb.dib + 1 185 } 186 } 187 188 // Del removes a value for a given key and returns the deleted value, or false 189 // Del can be considered the exported version of the delete call 190 func (m *HashMap) Del(key string) ([]byte, bool) { 191 return m.delete(0, key) 192 } 193 194 // delete removes a value for a given key and returns the deleted value, or false 195 func (m *HashMap) delete(hashkey uint64, key string) ([]byte, bool) { 196 // check if map is empty 197 if len(m.buckets) == 0 { 198 // nothing to see here folks 199 return nil, false 200 } 201 if hashkey == 0 { 202 // calculate the hashkey value 203 hashkey = m.hash(key) 204 } 205 // mask the hashkey to get the initial index 206 i := hashkey & m.mask 207 // search the position linearly 208 for { 209 // havent located anything 210 if m.buckets[i].dib == 0 { 211 return nil, false 212 } 213 // found existing entry, check hashes and keys 214 if m.buckets[i].checkHashAndKey(hashkey, key) { 215 // hashes and keys are a match--delete entry and return previous values 216 oldval := m.buckets[i].entry.val 217 m.deleteInternal(i) 218 return oldval, true 219 } 220 // keep on probing until we find what we're looking for. 221 // increase our search index by one as well as our new 222 // entry's dib, then continue with the linear probe. 223 i = (i + 1) & m.mask 224 } 225 } 226 227 // delete removes a value for a given key and returns the deleted value, or false 228 func (m *HashMap) deleteInternal(i uint64) { 229 // set dib at bucket i 230 m.buckets[i].dib = 0 231 // tombstone index and shift 232 for { 233 pi := i 234 i = (i + 1) & m.mask 235 if m.buckets[i].dib <= 1 { 236 // im as free as a bird now! 237 m.buckets[pi].entry = *new(entry) 238 m.buckets[pi] = *new(bucket) 239 break 240 } 241 // shift 242 m.buckets[pi] = m.buckets[i] 243 m.buckets[pi].dib = m.buckets[pi].dib - 1 244 } 245 // decrement entry count 246 m.keys-- 247 // check and see if we need to resize 248 if m.keys <= m.shrink && uint(len(m.buckets)) > m.size { 249 // if it checks out, then resize down by 25%-ish 250 m.resize(m.keys) 251 } 252 } 253 254 // Iterator is an iterator function type 255 type Iterator func(key string, value []byte) bool 256 257 // Range takes an Iterator and ranges the HashMap as long as long 258 // as the iterator function continues to be true. Range is not 259 // safe to perform an insert or remove operation while ranging! 260 func (m *HashMap) Range(it Iterator) { 261 for i := 0; i < len(m.buckets); i++ { 262 if m.buckets[i].dib < 1 { 263 continue 264 } 265 if !it(m.buckets[i].key, m.buckets[i].val) { 266 return 267 } 268 } 269 } 270 271 // GetHighestDIB returns the highest distance to initial bucket value in the table 272 func (m *HashMap) GetHighestDIB() uint8 { 273 var hdib uint8 274 for i := 0; i < len(m.buckets); i++ { 275 if m.buckets[i].dib > hdib { 276 hdib = m.buckets[i].dib 277 } 278 } 279 return hdib 280 } 281 282 // PercentFull returns the current load factor of the HashMap 283 func (m *HashMap) PercentFull() float64 { 284 return float64(m.keys) / float64(len(m.buckets)) 285 } 286 287 // Len returns the number of entries currently in the HashMap 288 func (m *HashMap) Len() int { 289 return int(m.keys) 290 } 291 292 // Close closes and frees the current hashmap. Calling any method 293 // on the HashMap after this will most likely result in a panic 294 func (m *HashMap) Close() { 295 destroyMap(m) 296 } 297 298 // destroy does exactly what is sounds like it does 299 func destroyMap(m *HashMap) { 300 m = nil 301 }