github.com/dolthub/swiss@v0.2.2-0.20240312182618-f4b2babd2bc1/map.go (about) 1 // Copyright 2023 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package swiss 16 17 import ( 18 "github.com/dolthub/maphash" 19 ) 20 21 const ( 22 maxLoadFactor = float32(maxAvgGroupLoad) / float32(groupSize) 23 ) 24 25 // Map is an open-addressing hash map 26 // based on Abseil's flat_hash_map. 27 type Map[K comparable, V any] struct { 28 ctrl []metadata 29 groups []group[K, V] 30 hash maphash.Hasher[K] 31 resident uint32 32 dead uint32 33 limit uint32 34 } 35 36 // metadata is the h2 metadata array for a group. 37 // find operations first probe the controls bytes 38 // to filter candidates before matching keys 39 type metadata [groupSize]int8 40 41 // group is a group of 16 key-value pairs 42 type group[K comparable, V any] struct { 43 keys [groupSize]K 44 values [groupSize]V 45 } 46 47 const ( 48 h1Mask uint64 = 0xffff_ffff_ffff_ff80 49 h2Mask uint64 = 0x0000_0000_0000_007f 50 empty int8 = -128 // 0b1000_0000 51 tombstone int8 = -2 // 0b1111_1110 52 ) 53 54 // h1 is a 57 bit hash prefix 55 type h1 uint64 56 57 // h2 is a 7 bit hash suffix 58 type h2 int8 59 60 // NewMap constructs a Map. 61 func NewMap[K comparable, V any](sz uint32) (m *Map[K, V]) { 62 groups := numGroups(sz) 63 m = &Map[K, V]{ 64 ctrl: make([]metadata, groups), 65 groups: make([]group[K, V], groups), 66 hash: maphash.NewHasher[K](), 67 limit: groups * maxAvgGroupLoad, 68 } 69 for i := range m.ctrl { 70 m.ctrl[i] = newEmptyMetadata() 71 } 72 return 73 } 74 75 // Has returns true if |key| is present in |m|. 76 func (m *Map[K, V]) Has(key K) (ok bool) { 77 hi, lo := splitHash(m.hash.Hash(key)) 78 g := probeStart(hi, len(m.groups)) 79 for { // inlined find loop 80 matches := metaMatchH2(&m.ctrl[g], lo) 81 for matches != 0 { 82 s := nextMatch(&matches) 83 if key == m.groups[g].keys[s] { 84 ok = true 85 return 86 } 87 } 88 // |key| is not in group |g|, 89 // stop probing if we see an empty slot 90 matches = metaMatchEmpty(&m.ctrl[g]) 91 if matches != 0 { 92 ok = false 93 return 94 } 95 g += 1 // linear probing 96 if g >= uint32(len(m.groups)) { 97 g = 0 98 } 99 } 100 } 101 102 // Get returns the |value| mapped by |key| if one exists. 103 func (m *Map[K, V]) Get(key K) (value V, ok bool) { 104 hi, lo := splitHash(m.hash.Hash(key)) 105 g := probeStart(hi, len(m.groups)) 106 for { // inlined find loop 107 matches := metaMatchH2(&m.ctrl[g], lo) 108 for matches != 0 { 109 s := nextMatch(&matches) 110 if key == m.groups[g].keys[s] { 111 value, ok = m.groups[g].values[s], true 112 return 113 } 114 } 115 // |key| is not in group |g|, 116 // stop probing if we see an empty slot 117 matches = metaMatchEmpty(&m.ctrl[g]) 118 if matches != 0 { 119 ok = false 120 return 121 } 122 g += 1 // linear probing 123 if g >= uint32(len(m.groups)) { 124 g = 0 125 } 126 } 127 } 128 129 // Put attempts to insert |key| and |value| 130 func (m *Map[K, V]) Put(key K, value V) { 131 if m.resident >= m.limit { 132 m.rehash(m.nextSize()) 133 } 134 hi, lo := splitHash(m.hash.Hash(key)) 135 g := probeStart(hi, len(m.groups)) 136 for { // inlined find loop 137 matches := metaMatchH2(&m.ctrl[g], lo) 138 for matches != 0 { 139 s := nextMatch(&matches) 140 if key == m.groups[g].keys[s] { // update 141 m.groups[g].keys[s] = key 142 m.groups[g].values[s] = value 143 return 144 } 145 } 146 // |key| is not in group |g|, 147 // stop probing if we see an empty slot 148 matches = metaMatchEmpty(&m.ctrl[g]) 149 if matches != 0 { // insert 150 s := nextMatch(&matches) 151 m.groups[g].keys[s] = key 152 m.groups[g].values[s] = value 153 m.ctrl[g][s] = int8(lo) 154 m.resident++ 155 return 156 } 157 g += 1 // linear probing 158 if g >= uint32(len(m.groups)) { 159 g = 0 160 } 161 } 162 } 163 164 // Delete attempts to remove |key|, returns true successful. 165 func (m *Map[K, V]) Delete(key K) (ok bool) { 166 hi, lo := splitHash(m.hash.Hash(key)) 167 g := probeStart(hi, len(m.groups)) 168 for { 169 matches := metaMatchH2(&m.ctrl[g], lo) 170 for matches != 0 { 171 s := nextMatch(&matches) 172 if key == m.groups[g].keys[s] { 173 ok = true 174 // optimization: if |m.ctrl[g]| contains any empty 175 // metadata bytes, we can physically delete |key| 176 // rather than placing a tombstone. 177 // The observation is that any probes into group |g| 178 // would already be terminated by the existing empty 179 // slot, and therefore reclaiming slot |s| will not 180 // cause premature termination of probes into |g|. 181 if metaMatchEmpty(&m.ctrl[g]) != 0 { 182 m.ctrl[g][s] = empty 183 m.resident-- 184 } else { 185 m.ctrl[g][s] = tombstone 186 m.dead++ 187 } 188 var k K 189 var v V 190 m.groups[g].keys[s] = k 191 m.groups[g].values[s] = v 192 return 193 } 194 } 195 // |key| is not in group |g|, 196 // stop probing if we see an empty slot 197 matches = metaMatchEmpty(&m.ctrl[g]) 198 if matches != 0 { // |key| absent 199 ok = false 200 return 201 } 202 g += 1 // linear probing 203 if g >= uint32(len(m.groups)) { 204 g = 0 205 } 206 } 207 } 208 209 // Iter iterates the elements of the Map, passing them to the callback. 210 // It guarantees that any key in the Map will be visited only once, and 211 // for un-mutated Maps, every key will be visited once. If the Map is 212 // Mutated during iteration, mutations will be reflected on return from 213 // Iter, but the set of keys visited by Iter is non-deterministic. 214 func (m *Map[K, V]) Iter(cb func(k K, v V) (stop bool)) { 215 // take a consistent view of the table in case 216 // we rehash during iteration 217 ctrl, groups := m.ctrl, m.groups 218 // pick a random starting group 219 g := randIntN(len(groups)) 220 for n := 0; n < len(groups); n++ { 221 for s, c := range ctrl[g] { 222 if c == empty || c == tombstone { 223 continue 224 } 225 k, v := groups[g].keys[s], groups[g].values[s] 226 if stop := cb(k, v); stop { 227 return 228 } 229 } 230 g++ 231 if g >= uint32(len(groups)) { 232 g = 0 233 } 234 } 235 } 236 237 // Clear removes all elements from the Map. 238 func (m *Map[K, V]) Clear() { 239 for i, c := range m.ctrl { 240 for j := range c { 241 m.ctrl[i][j] = empty 242 } 243 } 244 var k K 245 var v V 246 for i := range m.groups { 247 g := &m.groups[i] 248 for i := range g.keys { 249 g.keys[i] = k 250 g.values[i] = v 251 } 252 } 253 m.resident, m.dead = 0, 0 254 } 255 256 // Count returns the number of elements in the Map. 257 func (m *Map[K, V]) Count() int { 258 return int(m.resident - m.dead) 259 } 260 261 // Capacity returns the number of additional elements 262 // the can be added to the Map before resizing. 263 func (m *Map[K, V]) Capacity() int { 264 return int(m.limit - m.resident) 265 } 266 267 // find returns the location of |key| if present, or its insertion location if absent. 268 // for performance, find is manually inlined into public methods. 269 func (m *Map[K, V]) find(key K, hi h1, lo h2) (g, s uint32, ok bool) { 270 g = probeStart(hi, len(m.groups)) 271 for { 272 matches := metaMatchH2(&m.ctrl[g], lo) 273 for matches != 0 { 274 s = nextMatch(&matches) 275 if key == m.groups[g].keys[s] { 276 return g, s, true 277 } 278 } 279 // |key| is not in group |g|, 280 // stop probing if we see an empty slot 281 matches = metaMatchEmpty(&m.ctrl[g]) 282 if matches != 0 { 283 s = nextMatch(&matches) 284 return g, s, false 285 } 286 g += 1 // linear probing 287 if g >= uint32(len(m.groups)) { 288 g = 0 289 } 290 } 291 } 292 293 func (m *Map[K, V]) nextSize() (n uint32) { 294 n = uint32(len(m.groups)) * 2 295 if m.dead >= (m.resident / 2) { 296 n = uint32(len(m.groups)) 297 } 298 return 299 } 300 301 func (m *Map[K, V]) rehash(n uint32) { 302 groups, ctrl := m.groups, m.ctrl 303 m.groups = make([]group[K, V], n) 304 m.ctrl = make([]metadata, n) 305 for i := range m.ctrl { 306 m.ctrl[i] = newEmptyMetadata() 307 } 308 m.hash = maphash.NewSeed(m.hash) 309 m.limit = n * maxAvgGroupLoad 310 m.resident, m.dead = 0, 0 311 for g := range ctrl { 312 for s := range ctrl[g] { 313 c := ctrl[g][s] 314 if c == empty || c == tombstone { 315 continue 316 } 317 m.Put(groups[g].keys[s], groups[g].values[s]) 318 } 319 } 320 } 321 322 func (m *Map[K, V]) loadFactor() float32 { 323 slots := float32(len(m.groups) * groupSize) 324 return float32(m.resident-m.dead) / slots 325 } 326 327 // numGroups returns the minimum number of groups needed to store |n| elems. 328 func numGroups(n uint32) (groups uint32) { 329 groups = (n + maxAvgGroupLoad - 1) / maxAvgGroupLoad 330 if groups == 0 { 331 groups = 1 332 } 333 return 334 } 335 336 func newEmptyMetadata() (meta metadata) { 337 for i := range meta { 338 meta[i] = empty 339 } 340 return 341 } 342 343 func splitHash(h uint64) (h1, h2) { 344 return h1((h & h1Mask) >> 7), h2(h & h2Mask) 345 } 346 347 func probeStart(hi h1, groups int) uint32 { 348 return fastModN(uint32(hi), uint32(groups)) 349 } 350 351 // lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ 352 func fastModN(x, n uint32) uint32 { 353 return uint32((uint64(x) * uint64(n)) >> 32) 354 }