github.com/saintwish/kv@v1.0.4/swiss/map.go (about) 1 // From https://github.com/dolthub/swiss 2 // Copyright 2023 Dolthub, Inc. 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 package swiss 17 18 import ( 19 "github.com/dolthub/maphash" 20 ) 21 22 const ( 23 maxLoadFactor = float32(maxAvgGroupLoad) / float32(groupSize) 24 ) 25 26 // Map is an open-addressing hash map 27 // based on Abseil's flat_hash_map. 28 type Map[K comparable, V any] struct { 29 ctrl []metadata 30 groups []group[K, V] 31 hash maphash.Hasher[K] 32 resident uint32 33 dead uint32 34 limit uint32 35 } 36 37 // metadata is the h2 metadata array for a group. 38 // find operations first probe the controls bytes 39 // to filter candidates before matching keys 40 type metadata [groupSize]int8 41 42 // group is a group of 16 key-value pairs 43 type group[K comparable, V any] struct { 44 keys [groupSize]K 45 values [groupSize]V 46 } 47 48 const ( 49 h1Mask uint64 = 0xffff_ffff_ffff_ff80 50 h2Mask uint64 = 0x0000_0000_0000_007f 51 empty int8 = -128 // 0b1000_0000 52 tombstone int8 = -2 // 0b1111_1110 53 ) 54 55 // h1 is a 57 bit hash prefix 56 type h1 uint64 57 58 // h2 is a 7 bit hash suffix 59 type h2 int8 60 61 // NewMap constructs a Map. 62 func NewMap[K comparable, V any](sz uint32) (m *Map[K, V]) { 63 groups := numGroups(sz) 64 m = &Map[K, V]{ 65 ctrl: make([]metadata, groups), 66 groups: make([]group[K, V], groups), 67 hash: maphash.NewHasher[K](), 68 limit: groups * maxAvgGroupLoad, 69 } 70 for i := range m.ctrl { 71 m.ctrl[i] = newEmptyMetadata() 72 } 73 return 74 } 75 76 // Has returns true if |key| is present in |m|. 77 func (m *Map[K, V]) Has(key K) (ok bool) { 78 hi, lo := splitHash(m.hash.Hash(key)) 79 g := probeStart(hi, len(m.groups)) 80 for { // inlined find loop 81 matches := metaMatchH2(&m.ctrl[g], lo) 82 for matches != 0 { 83 s := nextMatch(&matches) 84 if key == m.groups[g].keys[s] { 85 ok = true 86 return 87 } 88 } 89 // |key| is not in group |g|, 90 // stop probing if we see an empty slot 91 matches = metaMatchEmpty(&m.ctrl[g]) 92 if matches != 0 { 93 ok = false 94 return 95 } 96 g += 1 // linear probing 97 if g >= uint32(len(m.groups)) { 98 g = 0 99 } 100 } 101 } 102 103 // Get returns the |value|. 104 func (m *Map[K, V]) Get(key K) (value V) { 105 hi, lo := splitHash(m.hash.Hash(key)) 106 g := probeStart(hi, len(m.groups)) 107 for { // inlined find loop 108 matches := metaMatchH2(&m.ctrl[g], lo) 109 for matches != 0 { 110 s := nextMatch(&matches) 111 if key == m.groups[g].keys[s] { 112 value, _ = m.groups[g].values[s], true 113 return 114 } 115 } 116 // |key| is not in group |g|, 117 // stop probing if we see an empty slot 118 matches = metaMatchEmpty(&m.ctrl[g]) 119 if matches != 0 { 120 return 121 } 122 g += 1 // linear probing 123 if g >= uint32(len(m.groups)) { 124 g = 0 125 } 126 } 127 } 128 129 // GetHas returns the |value| and |ok| mapped by |key|. 130 func (m *Map[K, V]) GetHas(key K) (ok bool, value V) { 131 hi, lo := splitHash(m.hash.Hash(key)) 132 g := probeStart(hi, len(m.groups)) 133 for { // inlined find loop 134 matches := metaMatchH2(&m.ctrl[g], lo) 135 for matches != 0 { 136 s := nextMatch(&matches) 137 if key == m.groups[g].keys[s] { 138 value, ok = m.groups[g].values[s], true 139 return 140 } 141 } 142 // |key| is not in group |g|, 143 // stop probing if we see an empty slot 144 matches = metaMatchEmpty(&m.ctrl[g]) 145 if matches != 0 { 146 ok = false 147 return 148 } 149 g += 1 // linear probing 150 if g >= uint32(len(m.groups)) { 151 g = 0 152 } 153 } 154 } 155 156 // Put attempts to insert |key| and |value| 157 func (m *Map[K, V]) Set(key K, value V) { 158 if m.resident >= m.limit { 159 m.rehash(m.nextSize()) 160 } 161 hi, lo := splitHash(m.hash.Hash(key)) 162 g := probeStart(hi, len(m.groups)) 163 for { // inlined find loop 164 matches := metaMatchH2(&m.ctrl[g], lo) 165 for matches != 0 { 166 s := nextMatch(&matches) 167 if key == m.groups[g].keys[s] { // update 168 m.groups[g].keys[s] = key 169 m.groups[g].values[s] = value 170 return 171 } 172 } 173 // |key| is not in group |g|, 174 // stop probing if we see an empty slot 175 matches = metaMatchEmpty(&m.ctrl[g]) 176 if matches != 0 { // insert 177 s := nextMatch(&matches) 178 m.groups[g].keys[s] = key 179 m.groups[g].values[s] = value 180 m.ctrl[g][s] = int8(lo) 181 m.resident++ 182 return 183 } 184 g += 1 // linear probing 185 if g >= uint32(len(m.groups)) { 186 g = 0 187 } 188 } 189 } 190 191 // Delete attempts to remove |key|, returns true if successful and the item. 192 func (m *Map[K, V]) Delete(key K) (ok bool, value V) { 193 hi, lo := splitHash(m.hash.Hash(key)) 194 g := probeStart(hi, len(m.groups)) 195 for { 196 matches := metaMatchH2(&m.ctrl[g], lo) 197 for matches != 0 { 198 s := nextMatch(&matches) 199 if key == m.groups[g].keys[s] { 200 ok, value = true, m.groups[g].values[s] 201 // optimization: if |m.ctrl[g]| contains any empty 202 // metadata bytes, we can physically delete |key| 203 // rather than placing a tombstone. 204 // The observation is that any probes into group |g| 205 // would already be terminated by the existing empty 206 // slot, and therefore reclaiming slot |s| will not 207 // cause premature termination of probes into |g|. 208 if metaMatchEmpty(&m.ctrl[g]) != 0 { 209 m.ctrl[g][s] = empty 210 m.resident-- 211 } else { 212 m.ctrl[g][s] = tombstone 213 m.dead++ 214 } 215 var k K 216 var v V 217 m.groups[g].keys[s] = k 218 m.groups[g].values[s] = v 219 return 220 } 221 } 222 // |key| is not in group |g|, 223 // stop probing if we see an empty slot 224 matches = metaMatchEmpty(&m.ctrl[g]) 225 if matches != 0 { // |key| absent 226 var v V 227 ok, value = false, v 228 return 229 } 230 g += 1 // linear probing 231 if g >= uint32(len(m.groups)) { 232 g = 0 233 } 234 } 235 } 236 237 // Iter iterates the elements of the Map, passing them to the callback. 238 // It guarantees that any key in the Map will be visited only once, and 239 // for un-mutated Maps, every key will be visited once. If the Map is 240 // Mutated during iteration, mutations will be reflected on return from 241 // Iter, but the set of keys visited by Iter is non-deterministic. 242 func (m *Map[K, V]) Iter(cb func(k K, v V) (stop bool)) { 243 // take a consistent view of the table in case 244 // we rehash during iteration 245 ctrl, groups := m.ctrl, m.groups 246 // pick a random starting group 247 g := randIntN(len(groups)) 248 for n := 0; n < len(groups); n++ { 249 for s, c := range ctrl[g] { 250 if c == empty || c == tombstone { 251 continue 252 } 253 k, v := groups[g].keys[s], groups[g].values[s] 254 if stop := cb(k, v); stop { 255 return 256 } 257 } 258 g++ 259 if g >= uint32(len(groups)) { 260 g = 0 261 } 262 } 263 } 264 265 // Clear removes all elements from the Map. 266 func (m *Map[K, V]) Clear() { 267 for i, c := range m.ctrl { 268 for j := range c { 269 m.ctrl[i][j] = empty 270 } 271 } 272 var k K 273 var v V 274 for i := range m.groups { 275 g := &m.groups[i] 276 for i := range g.keys { 277 g.keys[i] = k 278 g.values[i] = v 279 } 280 } 281 m.resident, m.dead = 0, 0 282 } 283 284 // Count returns the number of elements in the Map. 285 func (m *Map[K, V]) Count() int { 286 return int(m.resident - m.dead) 287 } 288 289 // Capacity returns the number of additional elements 290 // the can be added to the Map before resizing. 291 func (m *Map[K, V]) Capacity() int { 292 return int(m.limit - m.resident) 293 } 294 295 // MaxCapacity returns max number elements 296 // that can be added to the Map before resizing. 297 func (m *Map[K, V]) MaxCapacity() int { 298 return int(m.limit) 299 } 300 301 // find returns the location of |key| if present, or its insertion location if absent. 302 // for performance, find is manually inlined into public methods. 303 func (m *Map[K, V]) find(key K, hi h1, lo h2) (g, s uint32, ok bool) { 304 g = probeStart(hi, len(m.groups)) 305 for { 306 matches := metaMatchH2(&m.ctrl[g], lo) 307 for matches != 0 { 308 s = nextMatch(&matches) 309 if key == m.groups[g].keys[s] { 310 return g, s, true 311 } 312 } 313 // |key| is not in group |g|, 314 // stop probing if we see an empty slot 315 matches = metaMatchEmpty(&m.ctrl[g]) 316 if matches != 0 { 317 s = nextMatch(&matches) 318 return g, s, false 319 } 320 g += 1 // linear probing 321 if g >= uint32(len(m.groups)) { 322 g = 0 323 } 324 } 325 } 326 327 func (m *Map[K, V]) nextSize() (n uint32) { 328 n = uint32(len(m.groups)) * 2 329 if m.dead >= (m.resident / 2) { 330 n = uint32(len(m.groups)) 331 } 332 return 333 } 334 335 func (m *Map[K, V]) rehash(n uint32) { 336 groups, ctrl := m.groups, m.ctrl 337 m.groups = make([]group[K, V], n) 338 m.ctrl = make([]metadata, n) 339 for i := range m.ctrl { 340 m.ctrl[i] = newEmptyMetadata() 341 } 342 m.hash = maphash.NewSeed(m.hash) 343 m.limit = n * maxAvgGroupLoad 344 m.resident, m.dead = 0, 0 345 for g := range ctrl { 346 for s := range ctrl[g] { 347 c := ctrl[g][s] 348 if c == empty || c == tombstone { 349 continue 350 } 351 m.Set(groups[g].keys[s], groups[g].values[s]) 352 } 353 } 354 } 355 356 func (m *Map[K, V]) loadFactor() float32 { 357 slots := float32(len(m.groups) * groupSize) 358 return float32(m.resident-m.dead) / slots 359 } 360 361 // numGroups returns the minimum number of groups needed to store |n| elems. 362 func numGroups(n uint32) (groups uint32) { 363 groups = (n + maxAvgGroupLoad - 1) / maxAvgGroupLoad 364 if groups == 0 { 365 groups = 1 366 } 367 return 368 } 369 370 func newEmptyMetadata() (meta metadata) { 371 for i := range meta { 372 meta[i] = empty 373 } 374 return 375 } 376 377 func splitHash(h uint64) (h1, h2) { 378 return h1((h & h1Mask) >> 7), h2(h & h2Mask) 379 } 380 381 func probeStart(hi h1, groups int) uint32 { 382 return fastModN(uint32(hi), uint32(groups)) 383 } 384 385 // lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ 386 func fastModN(x, n uint32) uint32 { 387 return uint32((uint64(x) * uint64(n)) >> 32) 388 } 389 390 // randIntN returns a random number in the interval [0, n). 391 func randIntN(n int) uint32 { 392 return fastModN(fastrand(), uint32(n)) 393 }