github.com/mhmtszr/concurrent-swiss-map@v1.0.8/swiss/map.go (about) 1 // Copyright 2023 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package swiss 16 17 import ( 18 "github.com/mhmtszr/concurrent-swiss-map/maphash" 19 ) 20 21 const ( 22 maxLoadFactor = float32(maxAvgGroupLoad) / float32(groupSize) 23 ) 24 25 // Map is an open-addressing hash map 26 // based on Abseil's flat_hash_map. 27 type Map[K comparable, V any] struct { 28 ctrl []metadata 29 groups []group[K, V] 30 hash maphash.Hasher[K] 31 resident uint32 32 dead uint32 33 limit uint32 34 } 35 36 // metadata is the h2 metadata array for a group. 37 // find operations first probe the controls bytes 38 // to filter candidates before matching keys 39 type metadata [groupSize]int8 40 41 // group is a group of 16 key-value pairs 42 type group[K comparable, V any] struct { 43 keys [groupSize]K 44 values [groupSize]V 45 } 46 47 const ( 48 h1Mask uint64 = 0xffff_ffff_ffff_ff80 49 h2Mask uint64 = 0x0000_0000_0000_007f 50 empty int8 = -128 // 0b1000_0000 51 tombstone int8 = -2 // 0b1111_1110 52 ) 53 54 // h1 is a 57 bit hash prefix 55 type h1 uint64 56 57 // h2 is a 7 bit hash suffix 58 type h2 int8 59 60 // NewMap constructs a Map. 61 func NewMap[K comparable, V any](sz uint32) (m *Map[K, V]) { 62 groups := numGroups(sz) 63 m = &Map[K, V]{ 64 ctrl: make([]metadata, groups), 65 groups: make([]group[K, V], groups), 66 hash: maphash.NewHasher[K](), 67 limit: groups * maxAvgGroupLoad, 68 } 69 for i := range m.ctrl { 70 m.ctrl[i] = newEmptyMetadata() 71 } 72 return 73 } 74 75 func (m *Map[K, V]) HasWithHash(key K, hash uint64) (ok bool) { 76 hi, lo := splitHash(hash) 77 g := probeStart(hi, len(m.groups)) 78 for { // inlined find loop 79 matches := metaMatchH2(&m.ctrl[g], lo) 80 for matches != 0 { 81 s := nextMatch(&matches) 82 if key == m.groups[g].keys[s] { 83 ok = true 84 return 85 } 86 } 87 // |key| is not in group |g|, 88 // stop probing if we see an empty slot 89 matches = metaMatchEmpty(&m.ctrl[g]) 90 if matches != 0 { 91 ok = false 92 return 93 } 94 g++ // linear probing 95 if g >= uint32(len(m.groups)) { 96 g = 0 97 } 98 } 99 } 100 101 func (m *Map[K, V]) GetWithHash(key K, hash uint64) (value V, ok bool) { 102 hi, lo := splitHash(hash) 103 g := probeStart(hi, len(m.groups)) 104 for { // inlined find loop 105 matches := metaMatchH2(&m.ctrl[g], lo) 106 for matches != 0 { 107 s := nextMatch(&matches) 108 if key == m.groups[g].keys[s] { 109 value, ok = m.groups[g].values[s], true 110 return 111 } 112 } 113 // |key| is not in group |g|, 114 // stop probing if we see an empty slot 115 matches = metaMatchEmpty(&m.ctrl[g]) 116 if matches != 0 { 117 ok = false 118 return 119 } 120 g++ // linear probing 121 if g >= uint32(len(m.groups)) { 122 g = 0 123 } 124 } 125 } 126 127 // Put attempts to insert |key| and |value| 128 func (m *Map[K, V]) Put(key K, value V) { 129 if m.resident >= m.limit { 130 m.rehash(m.nextSize()) 131 } 132 hi, lo := splitHash(m.hash.Hash(key)) 133 g := probeStart(hi, len(m.groups)) 134 for { // inlined find loop 135 matches := metaMatchH2(&m.ctrl[g], lo) 136 for matches != 0 { 137 s := nextMatch(&matches) 138 if key == m.groups[g].keys[s] { // update 139 m.groups[g].keys[s] = key 140 m.groups[g].values[s] = value 141 return 142 } 143 } 144 // |key| is not in group |g|, 145 // stop probing if we see an empty slot 146 matches = metaMatchEmpty(&m.ctrl[g]) 147 if matches != 0 { // insert 148 s := nextMatch(&matches) 149 m.groups[g].keys[s] = key 150 m.groups[g].values[s] = value 151 m.ctrl[g][s] = int8(lo) 152 m.resident++ 153 return 154 } 155 g++ // linear probing 156 if g >= uint32(len(m.groups)) { 157 g = 0 158 } 159 } 160 } 161 162 // Put attempts to insert |key| and |value| 163 func (m *Map[K, V]) PutWithHash(key K, value V, hash uint64) { 164 if m.resident >= m.limit { 165 m.rehash(m.nextSize()) 166 } 167 hi, lo := splitHash(hash) 168 g := probeStart(hi, len(m.groups)) 169 for { // inlined find loop 170 matches := metaMatchH2(&m.ctrl[g], lo) 171 for matches != 0 { 172 s := nextMatch(&matches) 173 if key == m.groups[g].keys[s] { // update 174 m.groups[g].keys[s] = key 175 m.groups[g].values[s] = value 176 return 177 } 178 } 179 // |key| is not in group |g|, 180 // stop probing if we see an empty slot 181 matches = metaMatchEmpty(&m.ctrl[g]) 182 if matches != 0 { // insert 183 s := nextMatch(&matches) 184 m.groups[g].keys[s] = key 185 m.groups[g].values[s] = value 186 m.ctrl[g][s] = int8(lo) 187 m.resident++ 188 return 189 } 190 g++ // linear probing 191 if g >= uint32(len(m.groups)) { 192 g = 0 193 } 194 } 195 } 196 197 func (m *Map[K, V]) DeleteWithHash(key K, hash uint64) (ok bool) { 198 hi, lo := splitHash(hash) 199 g := probeStart(hi, len(m.groups)) 200 for { 201 matches := metaMatchH2(&m.ctrl[g], lo) 202 for matches != 0 { 203 s := nextMatch(&matches) 204 if key == m.groups[g].keys[s] { 205 ok = true 206 // optimization: if |m.ctrl[g]| contains any empty 207 // metadata bytes, we can physically delete |key| 208 // rather than placing a tombstone. 209 // The observation is that any probes into group |g| 210 // would already be terminated by the existing empty 211 // slot, and therefore reclaiming slot |s| will not 212 // cause premature termination of probes into |g|. 213 if metaMatchEmpty(&m.ctrl[g]) != 0 { 214 m.ctrl[g][s] = empty 215 m.resident-- 216 } else { 217 m.ctrl[g][s] = tombstone 218 m.dead++ 219 } 220 var k K 221 var v V 222 m.groups[g].keys[s] = k 223 m.groups[g].values[s] = v 224 return 225 } 226 } 227 // |key| is not in group |g|, 228 // stop probing if we see an empty slot 229 matches = metaMatchEmpty(&m.ctrl[g]) 230 if matches != 0 { // |key| absent 231 ok = false 232 return 233 } 234 g++ // linear probing 235 if g >= uint32(len(m.groups)) { 236 g = 0 237 } 238 } 239 } 240 241 // Clear removes all elements from the Map. 242 func (m *Map[K, V]) Clear() { 243 for i, c := range m.ctrl { 244 for j := range c { 245 m.ctrl[i][j] = empty 246 } 247 } 248 var k K 249 var v V 250 for i := range m.groups { 251 g := &m.groups[i] 252 for i := range g.keys { 253 g.keys[i] = k 254 g.values[i] = v 255 } 256 } 257 m.resident, m.dead = 0, 0 258 } 259 260 // Iter iterates the elements of the Map, passing them to the callback. 261 // It guarantees that any key in the Map will be visited only once, and 262 // for un-mutated Maps, every key will be visited once. If the Map is 263 // Mutated during iteration, mutations will be reflected on return from 264 // Iter, but the set of keys visited by Iter is non-deterministic. 265 // 266 //nolint:gosec 267 func (m *Map[K, V]) Iter(cb func(k K, v V) (stop bool)) bool { 268 // take a consistent view of the table in case 269 // we rehash during iteration 270 ctrl, groups := m.ctrl, m.groups 271 // pick a random starting group 272 g := randIntN(len(groups)) 273 for n := 0; n < len(groups); n++ { 274 for s, c := range ctrl[g] { 275 if c == empty || c == tombstone { 276 continue 277 } 278 k, v := groups[g].keys[s], groups[g].values[s] 279 if stop := cb(k, v); stop { 280 return stop 281 } 282 } 283 g++ 284 if g >= uint32(len(groups)) { 285 g = 0 286 } 287 } 288 return false 289 } 290 291 // Count returns the number of elements in the Map. 292 func (m *Map[K, V]) Count() int { 293 return int(m.resident - m.dead) 294 } 295 296 func (m *Map[K, V]) nextSize() (n uint32) { 297 n = uint32(len(m.groups)) * 2 298 if m.dead >= (m.resident / 2) { 299 n = uint32(len(m.groups)) 300 } 301 return 302 } 303 304 func (m *Map[K, V]) rehash(n uint32) { 305 groups, ctrl := m.groups, m.ctrl 306 m.groups = make([]group[K, V], n) 307 m.ctrl = make([]metadata, n) 308 for i := range m.ctrl { 309 m.ctrl[i] = newEmptyMetadata() 310 } 311 m.hash = maphash.NewSeed(m.hash) 312 m.limit = n * maxAvgGroupLoad 313 m.resident, m.dead = 0, 0 314 for g := range ctrl { 315 for s := range ctrl[g] { 316 c := ctrl[g][s] 317 if c == empty || c == tombstone { 318 continue 319 } 320 m.Put(groups[g].keys[s], groups[g].values[s]) 321 } 322 } 323 } 324 325 // numGroups returns the minimum number of groups needed to store |n| elems. 326 func numGroups(n uint32) (groups uint32) { 327 groups = (n + maxAvgGroupLoad - 1) / maxAvgGroupLoad 328 if groups == 0 { 329 groups = 1 330 } 331 return 332 } 333 334 func newEmptyMetadata() (meta metadata) { 335 for i := range meta { 336 meta[i] = empty 337 } 338 return 339 } 340 341 func splitHash(h uint64) (h1, h2) { 342 return h1((h & h1Mask) >> 7), h2(h & h2Mask) 343 } 344 345 func probeStart(hi h1, groups int) uint32 { 346 return fastModN(uint32(hi), uint32(groups)) 347 } 348 349 // lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ 350 func fastModN(x, n uint32) uint32 { 351 return uint32((uint64(x) * uint64(n)) >> 32) 352 } 353 354 // randIntN returns a random number in the interval [0, n). 355 func randIntN(n int) uint32 { 356 return fastModN(fastrand(), uint32(n)) 357 }