github.com/scottcagno/storage@v1.8.0/pkg/hashmap/openaddr/rhhmap_gp.go (about) 1 package openaddr 2 3 import ( 4 "github.com/scottcagno/storage/pkg/hash/murmur3" 5 "reflect" 6 "unsafe" 7 ) 8 9 // entryGP is a key value pair that is found in each bucketGP 10 type entryGP struct { 11 key string 12 val interface{} 13 } 14 15 // bucketGP represents a single slot in the HashMapGP table 16 type bucketGP struct { 17 dib uint8 18 hashkey uint64 19 entryGP 20 } 21 22 // checkHashAndKey checks if this bucketGP matches the specified hashkey and key 23 func (b *bucketGP) checkHashAndKey(hashkey uint64, key string) bool { 24 return b.hashkey == hashkey && b.entryGP.key == key 25 } 26 27 // HashMapGP represents a closed hashing hashtable implementation 28 type HashMapGP struct { 29 hash hashFuncGP 30 mask uint64 31 expand uint 32 shrink uint 33 keys uint 34 size uint 35 buckets []bucketGP 36 } 37 38 // defaultHashFunc is the default hashFunc used. This is here mainly as 39 // a convenience for the sharded hashmap to utilize 40 func defaultHashFuncGP(key string) uint64 { 41 return murmur3.Sum64(*(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{ 42 Data: uintptr(unsafe.Pointer(&key)), 43 Len: 8, 44 Cap: 8, 45 }))) 46 } 47 48 // hashFunc is a type definition for what a hash function should look like 49 type hashFuncGP func(key string) uint64 50 51 // NewHashMapGP returns a new HashMapGP instantiated with the specified size or 52 // the DefaultMapSize, whichever is larger 53 func NewHashMapGP(size uint) *HashMapGP { 54 return newHashMapGP(size, defaultHashFuncGP) 55 } 56 57 // newHashMap is the internal variant of the previous function 58 // and is mainly used internally 59 func newHashMapGP(size uint, hash hashFuncGP) *HashMapGP { 60 bukCnt := alignBucketCount(size) 61 if hash == nil { 62 hash = defaultHashFuncGP 63 } 64 m := &HashMapGP{ 65 hash: hash, 66 mask: bukCnt - 1, // this minus one is extremely important for using a mask over modulo 67 expand: uint(float64(bukCnt) * DefaultLoadFactor), 68 shrink: uint(float64(bukCnt) * (1 - DefaultLoadFactor)), 69 keys: 0, 70 size: size, 71 buckets: make([]bucketGP, bukCnt), 72 } 73 return m 74 } 75 76 // resize grows or shrinks the HashMapGP by the newSize provided. It makes a 77 // new map with the new size, copies everything over, and then frees the old map 78 func (m *HashMapGP) resize(newSize uint) { 79 newHM := newHashMapGP(newSize, m.hash) 80 var buk bucketGP 81 for i := 0; i < len(m.buckets); i++ { 82 buk = m.buckets[i] 83 if buk.dib > 0 { 84 newHM.insertInternal(buk.hashkey, buk.entryGP.key, buk.entryGP.val) 85 } 86 } 87 tsize := m.size 88 *m = *newHM 89 m.size = tsize 90 } 91 92 // Get returns a value for a given key, or returns false if none could be found 93 // Get can be considered the exported version of the lookup call 94 func (m *HashMapGP) Get(key string) (interface{}, bool) { 95 return m.lookup(0, key) 96 } 97 98 // lookup returns a value for a given key, or returns false if none could be found 99 func (m *HashMapGP) lookup(hashkey uint64, key string) (interface{}, bool) { 100 // check if map is empty 101 if len(m.buckets) == 0 { 102 // hopefully this should never really happen 103 // do we really need to check this here? 104 *m = *newHashMapGP(DefaultMapSize, m.hash) 105 } 106 if hashkey == 0 { 107 // calculate the hashkey value 108 hashkey = m.hash(key) 109 } 110 // mask the hashkey to get the initial index 111 i := hashkey & m.mask 112 // search the position linearly 113 for { 114 // havent located anything 115 if m.buckets[i].dib == 0 { 116 return 0, false 117 } 118 // check for matching hashes and keys 119 if m.buckets[i].checkHashAndKey(hashkey, key) { 120 return m.buckets[i].entryGP.val, true 121 } 122 // keep on probing 123 i = (i + 1) & m.mask 124 } 125 } 126 127 // Set inserts a key value entryGP and returns the previous value or false 128 // Set can be considered the exported version of the insert call 129 func (m *HashMapGP) Set(key string, value interface{}) (interface{}, bool) { 130 return m.insert(0, key, value) 131 } 132 133 // insert inserts a key value entryGP and returns the previous value, or false 134 func (m *HashMapGP) insert(hashkey uint64, key string, value interface{}) (interface{}, bool) { 135 // check if map is empty 136 if len(m.buckets) == 0 { 137 // create a new map with default size 138 *m = *newHashMapGP(DefaultMapSize, m.hash) 139 } 140 // check and see if we need to resize 141 if m.keys >= m.expand { 142 // if we do, then double the map size 143 m.resize(uint(len(m.buckets)) * 2) 144 } 145 if hashkey == 0 { 146 // calculate the hashkey value 147 hashkey = m.hash(key) 148 } 149 // call the internal insert to insert the entryGP 150 return m.insertInternal(hashkey, key, value) 151 } 152 153 // insertInternal inserts a key value entryGP and returns the previous value, or false 154 func (m *HashMapGP) insertInternal(hashkey uint64, key string, value interface{}) (interface{}, bool) { 155 // create a new entryGP to insert 156 newb := bucketGP{ 157 dib: 1, 158 hashkey: hashkey, 159 entryGP: entryGP{ 160 key: key, 161 val: value, 162 }, 163 } 164 // mask the hashkey to get the initial index 165 i := newb.hashkey & m.mask 166 // search the position linearly 167 for { 168 // we found a spot, insert a new entryGP 169 if m.buckets[i].dib == 0 { 170 m.buckets[i] = newb 171 m.keys++ 172 // no previous value to return, as this is a new entryGP 173 return 0, false 174 } 175 // found existing entryGP, check hashes and keys 176 if m.buckets[i].checkHashAndKey(newb.hashkey, newb.entryGP.key) { 177 // hashes and keys are a match--update entryGP and return previous values 178 oldval := m.buckets[i].entryGP.val 179 m.buckets[i].val = newb.entryGP.val 180 return oldval, true 181 } 182 // we did not find an empty slot or an existing matching entryGP 183 // so check this entries dib against our new entryGP's dib 184 if m.buckets[i].dib < newb.dib { 185 // current position's dib is less than our new entryGP's, swap 186 newb, m.buckets[i] = m.buckets[i], newb 187 } 188 // keep on probing until we find what we're looking for. 189 // increase our search index by one as well as our new 190 // entryGP's dib, then continue with the linear probe. 191 i = (i + 1) & m.mask 192 newb.dib = newb.dib + 1 193 } 194 } 195 196 // Del removes a value for a given key and returns the deleted value, or false 197 // Del can be considered the exported version of the delete call 198 func (m *HashMapGP) Del(key string) (interface{}, bool) { 199 return m.delete(0, key) 200 } 201 202 // delete removes a value for a given key and returns the deleted value, or false 203 func (m *HashMapGP) delete(hashkey uint64, key string) (interface{}, bool) { 204 // check if map is empty 205 if len(m.buckets) == 0 { 206 // nothing to see here folks 207 return 0, false 208 } 209 if hashkey == 0 { 210 // calculate the hashkey value 211 hashkey = m.hash(key) 212 } 213 // mask the hashkey to get the initial index 214 i := hashkey & m.mask 215 // search the position linearly 216 for { 217 // havent located anything 218 if m.buckets[i].dib == 0 { 219 return 0, false 220 } 221 // found existing entryGP, check hashes and keys 222 if m.buckets[i].checkHashAndKey(hashkey, key) { 223 // hashes and keys are a match--delete entryGP and return previous values 224 oldval := m.buckets[i].entryGP.val 225 m.deleteInternal(i) 226 return oldval, true 227 } 228 // keep on probing until we find what we're looking for. 229 // increase our search index by one as well as our new 230 // entryGP's dib, then continue with the linear probe. 231 i = (i + 1) & m.mask 232 } 233 } 234 235 // delete removes a value for a given key and returns the deleted value, or false 236 func (m *HashMapGP) deleteInternal(i uint64) { 237 // set dib at bucketGP i 238 m.buckets[i].dib = 0 239 // tombstone index and shift 240 for { 241 pi := i 242 i = (i + 1) & m.mask 243 if m.buckets[i].dib <= 1 { 244 // im as free as a bird now! 245 m.buckets[pi].entryGP = *new(entryGP) 246 m.buckets[pi] = *new(bucketGP) 247 break 248 } 249 // shift 250 m.buckets[pi] = m.buckets[i] 251 m.buckets[pi].dib = m.buckets[pi].dib - 1 252 } 253 // decrement entryGP count 254 m.keys-- 255 // check and see if we need to resize 256 if m.keys <= m.shrink && uint(len(m.buckets)) > m.size { 257 // if it checks out, then resize down by 25%-ish 258 m.resize(m.keys) 259 } 260 } 261 262 // IteratorGP is an iterator function type 263 type IteratorGP func(key string, value interface{}) bool 264 265 // Range takes an Iterator and ranges the HashMapGP as long as long 266 // as the iterator function continues to be true. Range is not 267 // safe to perform an insert or remove operation while ranging! 268 func (m *HashMapGP) Range(it IteratorGP) { 269 for i := 0; i < len(m.buckets); i++ { 270 if m.buckets[i].dib < 1 { 271 continue 272 } 273 if !it(m.buckets[i].key, m.buckets[i].val) { 274 return 275 } 276 } 277 } 278 279 // GetHighestDIB returns the highest distance to initial bucketGP value in the table 280 func (m *HashMapGP) GetHighestDIB() uint8 { 281 var hdib uint8 282 for i := 0; i < len(m.buckets); i++ { 283 if m.buckets[i].dib > hdib { 284 hdib = m.buckets[i].dib 285 } 286 } 287 return hdib 288 } 289 290 // PercentFull returns the current load factor of the HashMapGP 291 func (m *HashMapGP) PercentFull() float64 { 292 return float64(m.keys) / float64(len(m.buckets)) 293 } 294 295 // Len returns the number of entries currently in the HashMapGP 296 func (m *HashMapGP) Len() int { 297 return int(m.keys) 298 } 299 300 // Close closes and frees the current hashmap. Calling any method 301 // on the HashMapGP after this will most likely result in a panic 302 func (m *HashMapGP) Close() { 303 destroyHashMapGP(m) 304 } 305 306 // destroy does exactly what is sounds like it does 307 func destroyHashMapGP(m *HashMapGP) { 308 m = nil 309 }