github.com/thepudds/swisstable@v0.0.0-20221011152303-9c77dc657777/map.go (about) 1 package swisstable 2 3 import ( 4 "fmt" 5 "math/bits" 6 "runtime" 7 "unsafe" 8 ) 9 10 // Basic terminology: 11 // map: overall data structure, internally organized into groups. 12 // group: a set of 16 contiguous positions that can be examined in parallel. 13 // position: index within the overall linear table. Each position has a slot & control byte. 14 // slot: stores one key/value. 15 // control byte: metadata about a particular slot, including whether empty, deleted, or has a stored value. 16 // offset: index within a group. 17 // H1: hash(key) % group count. Corresponds to the natural (non-displaced) group for a given key. 18 // H2: 7 additional bits from hash(key). Stored in control byte. 19 // count: number of live key/values. Returned via Len. 20 // table size: len(slots). 21 // 22 // Individual positions can be EMPTY, DELETED, or STORED (containing a key/value). 23 // 24 // In addition, internally there is a fixedTable type that is a non-resizable Swisstable. 25 // Map manages a current fixedTable, and when doing incremental growth, an old fixedTable. 26 // During write operations to Map (Set/Delete), the old fixedTable is gradually 27 // evacuated to the current fixedTable. 28 // 29 // Incremental growth without invalidating iterators presents some challenges, including 30 // because a Swisstable can mark control bytes as EMPTY or DELETED to ensure probing chains 31 // across groups are correctly followed to find any displaced elements. This must be 32 // properly navigated when juggling an old and new table. 33 // 34 // The basic approach is to maintain an immutable old once growth starts, along with 35 // some growth status bytes that are live for the duration of the growth, with one 36 // byte per group. (This can be collapsed down to fewer bits, but we use a full byte for now). 37 // Even with the extra growth status bytes, this still uses less memory than the runtime map, 38 // which allocates extra overflow buckets that exceed the size of the growth status bytes 39 // even for small key/values. 40 // 41 // If an iterator starts mid-growth, it walks both the old and new table, taking care 42 // not to emit the same key twice. If growth completes, the iterator continues to walk 43 // the old and new tables it started with. In both cases, it checks the live tables if needed to 44 // get the live golden data. It attempts to avoid re-hashing in some cases by reconstructing 45 // the hash from the group and 7-bits of stored h2. See the Range method for details. 46 // (I think it re-hashes less than runtime map iterator. TODO: confirm). 47 48 // Key, Value, and KV define our key and value types. 49 // TODO: these are placeholder types for performance testing prior to using generics. 50 type Key int64 51 type Value int64 52 type KV struct { 53 Key Key 54 Value Value 55 } 56 57 type hashFunc func(k Key, seed uintptr) uint64 58 59 // Control byte special values. 60 // If the high bit is 1, it is a special sentinel value of EMPTY or DELETED. 61 // If the high bit is 0, there is a STORED entry in the corresponding 62 // slot in the table, and the next 7 bits are the h2 values. (This is called 'FULL' 63 // in the original C++ swisstable implementation, but we call it STORED). 64 // TODO: consider flipping meaning of first bit, possibly with 0x00 for empty and 0x7F for deleted? 65 const emptySentinel = 0b1111_1111 66 const deletedSentinel = 0b1000_0000 67 68 // Map is a map, supporting Set, Get, Delete, Range and Len. 69 // It is implemented via a modified Swisstable. 70 // Unlike the original C++ Swisstable implementation, 71 // Map supports incremental resizing without invalidating iterators. 72 type Map struct { 73 // Internally, a Map manages one or two fixedTables to store key/values. Normally, 74 // it manages one fixedTable. While growing, it manages two fixedTables. 75 76 // current is a fixedTable containing the element array and metadata for the active fixedTable. 77 // Write operations (Set/Delete) on Map go to current. 78 current fixedTable 79 80 // old is only used during incremental growth. 81 // When growth starts, we move current to old, and no longer write or delete key/values in old, 82 // but instead gradually evacuate old to new on write operations (Set/Delete). 83 // Get and Range handle finding the correct "golden" data in either current or old. 84 old *fixedTable 85 86 // growStatus tracks what has happened on a group by group basis. 87 // To slightly simplify, currently each group gets a byte. TODO: could collapse that down to few bits. 88 growStatus []byte 89 90 sweepCursor uint64 91 92 // elemCount tracks the live count of key/values, and is returned by Len. 93 elemCount int 94 95 // when resizeThreshold is passed, we need to resize 96 // TODO: need to track DELETED count as well for resizing or compacting 97 resizeThreshold int 98 99 // currently for testing, we purposefully fill beyond the resizeThreshold. 100 // TODO: remove 101 disableResizing bool 102 103 // Our hash function, which generates a 64-bit hash 104 hashFunc hashFunc 105 seed uintptr 106 107 // Flags tracking state. 108 // TODO: collapse down to single flag variable 109 // TODO: could use these flags to indicate OK to clear during evac 110 // haveIter bool 111 // haveOldIter bool 112 113 // Internal stats to help observe behavior. 114 // TODO: eventually remove stats, not actively tracking some 115 gets int 116 getH2FalsePositives int 117 getExtraGroups int 118 resizeGenerations int 119 } 120 121 // New returns a *Map that is ready to use. 122 // capacity is a hint, and "at least". 123 func New(capacity int) *Map { 124 // tableSize will be roughly 1/0.8 x user suggested capacity, 125 // rounded up to a power of 2. 126 // TODO: for now, should probably make capcity be at least 16 (group size) 127 // to temporarily simplify handling small maps (where small here is < 16). 128 tableSize := calcTableSize(capacity) 129 130 current := *newFixedTable(tableSize) 131 132 // TODO: for now, use same fill factor as the runtime map to 133 // make it easier to compare performance across different sizes. 134 resizeThreshold := (tableSize * 13) / 16 // TODO: centralize 135 return &Map{ 136 current: current, 137 hashFunc: hashUint64, 138 seed: uintptr(fastrand())<<32 | uintptr(fastrand()), 139 resizeThreshold: resizeThreshold, 140 } 141 } 142 143 // fixedTable does not support resizing. 144 type fixedTable struct { 145 control []byte 146 slots []KV 147 // groupCount int // TODO: consider using this, but maybe instead compare groupMask? 148 groupMask uint64 149 h2Shift uint8 150 151 // track our count of deletes, which we use when determining when to resize 152 // TODO: dropping deletes without resize, or same size grow 153 // if zero, we can skip some logic in some operations 154 // TODO: check if that is a perf win 155 deleteCount int 156 } 157 158 // TODO: pick a key/value layout. Within the slots our current layout is KV|KV|KV|KV|..., vs. 159 // the runtime's layout uses unsafe to access K|K|K|K|...|V|V|V|V|... per 8-elem bucket. That is more compact 160 // if K & V are not aligned, but equally compact if they are aligned. 161 // If we ignore alignment, our current layout might have better cache behavior 162 // given high confidence that loading a key for example for lookup means you are about 163 // to access the adjacent value (which for typical key sizes would be in same or adjacent cache line). 164 // Folly F14 layout though is probably better overall than runtime layout or our current layout. 165 // (F14FastMap picks between values inline vs. values packed in a contiguous array based on entry size: 166 // https://github.com/facebook/folly/blob/main/folly/container/F14.md#f14-variants ) 167 168 func (m *Map) Get(k Key) (v Value, ok bool) { 169 h := m.hashFunc(k, m.seed) 170 171 if m.old == nil || isChainEvacuated(m.growStatus[h&m.old.groupMask]) { 172 // We are either not growing, which is the simple case, and we 173 // can just look in m.current, or we are growing but we have 174 // recorded that any keys with the natural group of this key 175 // have already been moved to m.current, which also means we 176 // can just look in m.current. 177 kv, _, _ := m.find(&m.current, k, h) 178 if kv != nil { 179 return kv.Value, true 180 } 181 return zeroValue(), false 182 } 183 184 // We are growing. 185 // TODO: maybe extract to findGrowing or similar. Would be nice to do midstack inlining for common case. 186 oldNatGroup := h & m.old.groupMask 187 oldNatGroupEvac := isEvacuated(m.growStatus[oldNatGroup]) 188 table := &m.current 189 if !oldNatGroupEvac { 190 // The key has never been written/deleted in current since this grow started 191 // (because we always move the natural group when writing/deleting a key while growing). 192 table = m.old 193 } 194 kv, _, _ := m.find(table, k, h) 195 if kv != nil { 196 // Hit 197 return kv.Value, true 198 } 199 if !oldNatGroupEvac { 200 // Miss in old, and the key has never been written/deleted in current since grow started, 201 // so this is a miss for the overall map. 202 return zeroValue(), false 203 } 204 205 // We had a miss in current, and the old natural group was evacuated, 206 // but it is not yet conclusive if we have an overall miss. For example, 207 // perhaps a displaced key in old was moved to current and later deleted, or 208 // perhaps a displaced key was never moved to current and the golden copy is still in old. 209 // Side note: for any mid-growth map, the majority of groups are one of (a) not yet evacuated, or 210 // (b) evacuated and this Get is for a non-displaced key (because most keys are not displaced), 211 // so the work we did above handled that majority of groups. 212 // Now we do more work for less common cases. 213 214 oldKv, oldDisplGroup, _ := m.find(m.old, k, h) 215 if oldNatGroup == oldDisplGroup { 216 // We already know from above that this group was evacuated, 217 // which means if there was a prior matching key in this group, 218 // it would have been evacuated to current. 219 // Given it is not in current now, this is a miss for the overall map. 220 return zeroValue(), false 221 } 222 if oldKv != nil && !isEvacuated(m.growStatus[oldDisplGroup]) { 223 // Hit for the overall map. This is a group with a displaced matching key, and 224 // we've never written/deleted this key since grow started, 225 // so golden copy is in old. 226 // (This is example of us currently relying on always evacuating displaced key 227 // on write/delete). 228 // TODO: no non-fuzzing test hits this. might require longer probe chain. the fuzzing might hit. 229 return oldKv.Value, true 230 } 231 // Miss. The displaced group was evacuated to current, but current doesn't have the key 232 return zeroValue(), false 233 } 234 235 // find searches the fixedTable for a key. 236 // For a hit, group is the location of the key, and offset is the location within the group. 237 // For a miss, group is the last probed group. 238 func (m *Map) find(t *fixedTable, k Key, h uint64) (kv *KV, group uint64, offset int) { 239 // TODO: likely giving up some of performance by sharing find between Get and Delete 240 group = h & t.groupMask 241 h2 := t.h2(h) 242 243 // TODO: could try hints to elim some bounds check below with additional masking? maybe: 244 // controlLenMask := len(m.current.control) - 1 245 // slotsLenMask := len(m.current.slots) - 1 246 247 var probeCount uint64 248 249 // Do quadratic probing. 250 // This loop will terminate because (1) incrementing by 251 // triangluar numbers will hit every slot in a power of 2 sized table 252 // and (2) we always enforce at least some empty slots by resizing when needed. 253 for { 254 pos := group * 16 255 controlBytes := t.control[pos:] 256 bitmask, ok := MatchByte(h2, controlBytes) 257 if debug && !ok { 258 panic("short control byte slice") 259 } 260 for bitmask != 0 { 261 // We have at least one hit on h2 262 offset = bits.TrailingZeros32(bitmask) 263 kv := &t.slots[int(pos)+offset] 264 if kv.Key == k { 265 return kv, group, offset 266 } 267 // TODO: is this right? The test coverage hits this, but 268 // getting lower than expected false positives in benchmarks, maybe? 269 // (but current benchmarks might have more conservative fill currently?) 270 // m.getH2FalsePositives++ // stats. 271 272 // continue to look. infrequent with 7 bit h2. 273 // clear the bit we just checked. 274 bitmask &^= 1 << offset 275 } 276 277 // No matching h2, or we had a matching h2 278 // but failed to find an equal key in loop just above. 279 // Check if this group is full or has at least one empty slot. 280 // TODO: call it H1 and H2, removing h2 term 281 // TODO: can likely skip getting the offset below and just test bitmask > 0 282 emptyBitmask, ok := MatchByte(emptySentinel, t.control[group*16:]) 283 if debug && !ok { 284 panic("short control byte slice") 285 } 286 287 // If we have any EMPTY positions, we know the key we were 288 // looking to find was never displaced outside this group 289 // by quadratic probing during Set and hence can we stop now at this group 290 // (most often the key's natural group). 291 if emptyBitmask != 0 { 292 return nil, group, offset 293 } 294 295 // This group is full or contains STORED/DELETE without any EMPTY, 296 // so continue on to the next group. 297 // We don't do quadratic probing within a group, but we do 298 // quadratic probing across groups. 299 // Continue our quadratic probing across groups, using triangular numbers. 300 // TODO: rust implementation uses a ProbeSeq and later C++ also has a probe seq; could consider something similar 301 // m.getExtraGroups++ // stats 302 probeCount++ 303 group = (group + probeCount) & t.groupMask 304 if debug && probeCount >= uint64(len(t.slots)/16) { 305 panic(fmt.Sprintf("impossible: probeCount: %d groups: %d underlying table len: %d", probeCount, len(t.slots)/16, len(t.slots))) 306 } 307 } 308 } 309 310 // Set sets k and v within the map. 311 func (m *Map) Set(k Key, v Value) { 312 // Write the element, incrementing element count if needed and moving if needed. 313 m.set(k, v, 1, true) 314 } 315 316 // set sets k and v within the map, returning group and the probe count. 317 // elemIncr indicates if we should increment elementCount when populating 318 // a free slot. A zero enables us to use set when evacuating, 319 // which does not change the number of elements. 320 // moveIfNeeded indicates if we should do move operations if currently growing. 321 func (m *Map) set(k Key, v Value, elemIncr int, moveIfNeeded bool) { 322 h := m.hashFunc(k, m.seed) 323 group := h & m.current.groupMask 324 h2 := m.current.h2(h) 325 326 if moveIfNeeded && m.old != nil { 327 // We are growing. Move groups if needed 328 m.moveGroups(group, k, h) 329 } 330 331 var probeCount uint64 332 // Do quadratic probing. 333 // This loop will terminate for same reasons as find loop. 334 for { 335 bitmask, ok := MatchByte(h2, m.current.control[group*16:]) 336 if debug && !ok { 337 panic("short control byte slice") 338 } 339 340 for bitmask != 0 { 341 // We have at least one hit on h2 342 offset := bits.TrailingZeros32(bitmask) 343 pos := int(group*16) + offset 344 kv := m.current.slots[pos] 345 if kv.Key == k { 346 // update the existing key. Note we don't increment the elem count because we are replacing. 347 m.current.control[pos] = h2 348 m.current.slots[pos] = KV{Key: k, Value: v} 349 // Track if we have any displaced elements in current while growing. This is rare. 350 // TODO: This might not be a net perf win. 351 if m.old != nil && probeCount != 0 { 352 oldGroup := group & m.old.groupMask 353 m.growStatus[oldGroup] = setCurHasDisplaced(m.growStatus[oldGroup]) 354 } 355 return 356 } 357 358 // continue to look. infrequent with 7 bit h2. 359 // clear the bit we just checked. 360 bitmask &^= 1 << offset 361 } 362 363 // No matching h2, or we had a matching h2 364 // but failed to find an equal key in loop just above. 365 // See if this is the end of our probe chain, which is indicated 366 // by the presence of an EMPTY slot. 367 emptyBitmask := matchEmpty(m.current.control[group*16:]) 368 if emptyBitmask != 0 { 369 // We've reached the end of our probe chain without finding 370 // a match on an existing key. 371 if m.elemCount+m.current.deleteCount >= m.resizeThreshold && !m.disableResizing { 372 // Double our size 373 m.startResize() 374 375 // Also set the key we are working on, then we are done. 376 // (Simply re-using Set here causes tiny bit of extra work when resizing; 377 // we could instead let findFirstEmptyOrDeleted below handle it, 378 // but we would need to at least recalc h2). 379 // This is our first modification in our new table, 380 // and we want to move the group(s) that correspond to this key. 381 m.set(k, v, 1, true) 382 return 383 } 384 385 var offset int 386 if m.current.deleteCount == 0 || probeCount == 0 { 387 // If we've never used a DELETED tombstone in this fixedTable, 388 // the first group containing usable space is this group with its EMPTY slot, 389 // which might be at the end of a probe chain, and we can use it now. 390 // If instead we have DELETED somewhere but we have not just now probed beyond 391 // the natural group, we can use an EMPTY slot in the natural group. 392 // Either way, set the entry in this group using its first EMPTY slot. 393 // TODO: double-check this is worthwhile given this 394 // is an optimization that might not be in the C++ implementation? 395 offset = bits.TrailingZeros32(emptyBitmask) 396 } else { 397 // We know there is room in the group we are on, 398 // but we might have passed a usable DELETED slot during our 399 // probing, so we rewind to this key's natural group and 400 // probe forward from there, 401 // and use the first EMPTY or DELETED slot found. 402 group, offset = m.current.findFirstEmptyOrDeleted(h) 403 } 404 405 // update empty or deleted slot 406 pos := int(group*16) + offset 407 if m.current.control[pos] == deletedSentinel { 408 m.current.deleteCount-- 409 } 410 m.current.control[pos] = h2 411 m.current.slots[pos] = KV{Key: k, Value: v} 412 m.elemCount += elemIncr 413 // Track if we have any displaced elements in current while growing. This is rare. 414 if m.old != nil && probeCount != 0 { 415 oldGroup := group & m.old.groupMask 416 m.growStatus[oldGroup] = setCurHasDisplaced(m.growStatus[oldGroup]) 417 } 418 return 419 } 420 421 // We did not find an available slot. 422 // We don't do quadratic probing within a group, but we do 423 // quadratic probing across groups. 424 // Continue our quadratic probing across groups, using triangular numbers. 425 probeCount++ 426 group = (group + probeCount) & m.current.groupMask 427 428 if debug && probeCount >= uint64(len(m.current.slots)/16) { 429 panic(fmt.Sprintf("impossible: probeCount: %d groups: %d underlying table len: %d", probeCount, len(m.current.slots)/16, len(m.current.slots))) 430 } 431 } 432 } 433 434 // startResize creates a new fixedTable with doubled table size, 435 // then copies the elements from the old table to the new table, 436 // leaving the new table as a ready-to-use current. 437 func (m *Map) startResize() { 438 // prepare for a new, larger and initially empty current. 439 m.resizeThreshold = m.resizeThreshold << 1 440 newTableSize := len(m.current.control) << 1 441 442 // place current in old, and create a new current 443 m.old = &fixedTable{} 444 *m.old = m.current 445 m.current = *newFixedTable(newTableSize) 446 447 // get ready to track our grow operation 448 m.growStatus = make([]byte, len(m.old.control)) 449 m.sweepCursor = 0 450 451 // TODO: temp stat for now 452 m.resizeGenerations++ 453 } 454 455 // moveGroups takes a group in current along with a 456 // key that is triggering the move. It only expects to be called 457 // while growing. It moves up to three groups: 458 // 1. the natural group for this key 459 // 2. the group this key is located in if it is displaced in old from its natural group 460 // 3. incrementally move from the front, including to ensure we finish and don't miss any groups 461 func (m *Map) moveGroups(group uint64, k Key, h uint64) { 462 allowedMoves := 2 463 464 // First, if the natural group for this key has not been moved, move it 465 oldNatGroup := group & m.old.groupMask 466 if !isEvacuated(m.growStatus[oldNatGroup]) { 467 m.moveGroup(oldNatGroup) 468 allowedMoves-- 469 } 470 471 if !isChainEvacuated(m.growStatus[oldNatGroup]) { 472 // Walk the chain that started at the natural group, moving any unmoved groups as we go. 473 // If we move the complete chain, we mark the natural group as ChainEvacuated with moveChain. 474 // The first group we'll visit is the one after the natural group (probeCount of 1). 475 var chainEnd bool 476 allowedMoves, chainEnd = m.moveChain(oldNatGroup, 1, allowedMoves) 477 478 // We walked the chain as far we could. 479 if !chainEnd { 480 // Rare case. 481 // Our key might be displaced from its natural group in old, 482 // and we did not complete the chain, so we might not have 483 // reached the actual group with the key. 484 // We rely elsewhere (such as in Get) upon always moving the actual group 485 // containing the key when an existing key is Set/Deleted. 486 // Find the key. Note that we don't need to recompute the hash. 487 kv, oldDisplGroup, _ := m.find(m.old, k, h) 488 if kv != nil && oldDisplGroup != oldNatGroup { 489 if !isEvacuated(m.growStatus[oldDisplGroup]) { 490 // Not moved yet, so move it. 491 // TODO: non-fuzzing tests don't hit this. fuzzing hasn't reached this branch either (so far). 492 m.moveGroup(oldDisplGroup) 493 allowedMoves-- // Can reach -1 here. Rare, should be ok. 494 } 495 } 496 } 497 } 498 499 stopCursor := uint64(len(m.old.control)) / 16 500 if stopCursor > m.sweepCursor+1000 { 501 stopCursor = m.sweepCursor + 1000 502 } 503 for m.sweepCursor < stopCursor { 504 // Walk up to N groups looking for something to move and/or to mark ChainEvacuated. 505 // The sweepCursor group is marked ChainEvacuated if we evac through the end of the chain. 506 // The majority of the time, sweepCursor is a singleton chain or is otherwise the end of a chain. 507 if !isChainEvacuated(m.growStatus[m.sweepCursor]) { 508 allowedMoves, _ = m.moveChain(m.sweepCursor, 0, allowedMoves) 509 } 510 if isChainEvacuated(m.growStatus[m.sweepCursor]) { 511 m.sweepCursor++ 512 continue 513 } 514 if allowedMoves <= 0 { 515 break 516 } 517 } 518 519 // Check if we are now done 520 if m.sweepCursor >= (uint64(len(m.old.control)) / 16) { 521 // Done growing! 522 // TODO: we have some test coverage of this, but would be nice to have more explicit test 523 // TODO: maybe extract a utility func 524 m.old = nil 525 m.growStatus = nil 526 m.sweepCursor = 0 527 } 528 } 529 530 // moveChain walks a probe chain that starts at a natural group, moving unmoved groups. 531 // The probeCount parameter allows it to begin in the middle of a walk. 532 // moveChain returns the number of remaining allowedMoves and a bool indicating 533 // if the end of chain has been reached. 534 // Each moved group is marked as being evacuated, and if a chain is completely 535 // evacuated, the starting natural group is marked ChainEvacuated. 536 func (m *Map) moveChain(oldNatGroup uint64, probeCount uint64, allowedMoves int) (int, bool) { 537 g := (oldNatGroup + probeCount) & m.old.groupMask 538 539 for allowedMoves > 0 { 540 if !isEvacuated(m.growStatus[g]) { 541 // Evacute. 542 m.moveGroup(g) 543 allowedMoves-- 544 } 545 if matchEmpty(m.old.control[g*16:]) != 0 { 546 // Done with the chain. Record that. 547 m.growStatus[oldNatGroup] = setChainEvacuated(m.growStatus[oldNatGroup]) 548 // chainEnd is true 549 return allowedMoves, true 550 } 551 probeCount++ 552 g = (g + probeCount) & m.old.groupMask 553 } 554 return allowedMoves, false 555 } 556 557 // moveGroup takes a group in old, and moves it to current. 558 // It only moves that group, and does not cascade to other groups 559 // (even if moving the group writes displaced elements to other groups). 560 func (m *Map) moveGroup(group uint64) { 561 for offset, b := range m.old.control[group*16 : group*16+16] { 562 if isStored(b) { 563 // TODO: cleanup 564 kv := m.old.slots[group*16+uint64(offset)] 565 566 // We are re-using the set mechanism to write to 567 // current, but we don't want cascading moves of other groups 568 // based on this write, so moveIfNeeded is false. 569 // TODO: m.set does a little more work than strictly required, 570 // including we know key is not present in current yet, so could avoid MatchByte(h2) and 571 // some other logic. 572 m.set(kv.Key, kv.Value, 0, false) 573 } 574 } 575 // Mark it evacuated. 576 m.growStatus[group] = setEvacuated(m.growStatus[group]) 577 578 if matchEmpty(m.old.control[group*16:]) != 0 { 579 // The probe chain starting at this group ends at this group, 580 // so we can also mark it ChainEvacuated. 581 m.growStatus[group] = setChainEvacuated(m.growStatus[group]) 582 } 583 } 584 585 func (m *Map) Delete(k Key) { 586 // TODO: make a 'delete' with moveIfNeeded 587 588 h := m.hashFunc(k, m.seed) 589 group := h & m.current.groupMask 590 if m.old != nil { 591 // We are growing. Move groups if needed 592 // TODO: don't yet have a test that hits this (Delete while growing) 593 m.moveGroups(group, k, h) 594 } 595 596 kv, group, offset := m.find(&m.current, k, h) 597 if kv == nil { 598 return 599 } 600 601 // Mark existing key as deleted or empty. 602 // In the common case we can set this position back to empty. 603 var sentinel byte = emptySentinel 604 605 // However, we need to check if there are any EMPTY positions in this group 606 emptyBitmask, ok := MatchByte(emptySentinel, m.current.control[group*16:]) 607 if debug && !ok { 608 panic("short control byte slice") 609 } 610 if emptyBitmask == 0 { 611 // We must use a DELETED tombstone because there are no remaining 612 // positions marked EMPTY (which means there might have been displacement 613 // past this group in the past by quadratic probing, and hence we use tombstones to make 614 // sure we follow any displacement chain properly in any future operations). 615 sentinel = deletedSentinel 616 m.current.deleteCount++ 617 } 618 619 pos := int(group*16) + offset 620 m.current.control[pos] = sentinel 621 // TODO: for a pointer, would want to set nil. could do with 'zero' generics func. 622 m.current.slots[pos] = KV{} 623 m.elemCount-- 624 } 625 626 // matchEmptyOrDeleted checks if the first 16 bytes of controlBytes has 627 // any empty or deleted sentinels, returning a bitmask of the corresponding offsets. 628 // TODO: can optimize this via SSE (e.g., check high bit via _mm_movemask_epi8 or similar). 629 func matchEmptyOrDeleted(controlBytes []byte) uint32 { 630 emptyBitmask, ok := MatchByte(emptySentinel, controlBytes) 631 deletedBitmask, ok2 := MatchByte(deletedSentinel, controlBytes) 632 if debug && !(ok && ok2) { 633 panic("short control byte slice") 634 } 635 return emptyBitmask | deletedBitmask 636 } 637 638 // matchEmpty checks if the first 16 bytes of controlBytes has 639 // any empty sentinels, returning a bitmask of the corresponding offsets. 640 func matchEmpty(controlBytes []byte) uint32 { 641 emptyBitmask, ok := MatchByte(emptySentinel, controlBytes) 642 if debug && !ok { 643 panic("short control byte slice") 644 } 645 return emptyBitmask 646 } 647 648 func (m *Map) Range(f func(key Key, value Value) bool) { 649 // We iterate over snapshots of old and current tables, looking up 650 // the golden data in the live tables as needed. It might be that the live 651 // tables have a different value, or the live tables might have deleted the key, 652 // both of which we must respect at the moment we emit a key/value during iteration. 653 // However, we are not obligated to iterate over all the keys in the 654 // live tables -- we are allowed to emit a key added after iteration start, but 655 // are not required to do so. 656 // 657 // When iterating over our snapshot of old, we emit all keys encountered that are 658 // still present in the live tables. We then iterate over our snapshot of current, 659 // but skip any key present in the immutable old snapshot to avoid duplicates. 660 // 661 // In some cases, we can emit without a lookup, but in other cases we need to do a 662 // lookup in another table. We have some logic to minimize rehashing. While iterating 663 // over old, we typically need to rehash keys in evacuated groups, but while iterating 664 // over current, the common case is we do not need to rehash even to do a lookup. 665 // 666 // A Set or Delete is allowed during an iteration (e.g., a Set within the user's code 667 // invoked by Range might cause growth to start or finish), but not concurrently. 668 // For example, iterating while concurrently calling Set from another goroutine 669 // would be a user-level data race (similar to runtime maps). 670 // 671 // TODO: clean up comments and add better intro. 672 // TODO: make an iter struct, with a calling sequence like iterstart and iternext 673 674 // Begin by storing some snapshots of our tables. 675 // For example, another m.old could appear later if a 676 // new grow starts after this iterator starts. 677 // We want to iterate over the old that we started with. 678 // Note that old is immutable once we start growing. 679 // TODO: maybe gather these, such as: 680 // type iter struct { old, growStatus, current, oldPos, curPos, ... } 681 old := m.old 682 growStatus := m.growStatus 683 684 // A new m.current can also be created mid iteration, so snapshot 685 // it as well so that we can iterate over the current we started with. 686 cur := m.current 687 curControl := m.current.control[:] // TODO: maybe not needed, and/or collapse these? 688 curSlots := m.current.slots[:] // TODO: same 689 690 // Below, we pick a random starting group and starting offset within that group. 691 r := (uint64(fastrand()) << 32) | uint64(fastrand()) 692 if m.seed == 0 || m.seed == 42 { 693 // TODO: currently forcing repeatability for some tests, including fuzzing, but eventually remove 694 r = 0 695 } 696 697 // Now, iterate over our snapshot of old. 698 if old != nil { 699 for i, group := 0, r&old.groupMask; i < len(old.control)/16; i, group = i+1, (group+1)&old.groupMask { 700 offsetMask := uint64(0x0F) 701 for j, offset := 0, (r>>61)&offsetMask; j < 16; j, offset = j+1, (offset+1)&offsetMask { 702 pos := group*16 + offset 703 // Iterate over control bytes individually for now. 704 // TODO: consider 64-bit check of control bytes or SSE operations (e.g., _mm_movemask_epi8). 705 if isStored(old.control[pos]) { 706 k := old.slots[pos].Key 707 708 // We don't need to worry about displacements here when checking 709 // evacuation status. (We are iterating over each control byte, wherever they have landed). 710 if !isEvacuated(growStatus[pos/16]) { 711 // Not evac. Because we always move both a key's natural group 712 // and the key's displaced group for any Set or Delete, not evac means 713 // we know nothing in this group has ever 714 // been written or deleted in current, which means 715 // the key/value here in old are the golden data, 716 // which we use now. (If grow had completed, or if there 717 // have been multiple generations of growing, our snapshot 718 // of old will have everything evacuated). 719 // TODO: current non-fuzzing tests don't hit this. fuzzing does ;-) 720 cont := f(k, old.slots[pos].Value) 721 if !cont { 722 return 723 } 724 continue 725 } 726 727 // Now we handle the evacuated case. This key at one time was moved to current. 728 // Check where the golden data resides now, and emit the live key/value if they still exist. 729 // TODO: could probably do less work, including avoiding lookup/hashing in same cases 730 731 if cur.groupMask == m.current.groupMask || m.old == nil { 732 // We still in the same grow as when the iter started, 733 // or that grow is finished and we are not in the middle 734 // of a different grow, so we don't need to look in m.old 735 // (because this elem is already evacuated, or m.old doesn't exist), 736 // and hence can just look in m.current. 737 kv, _, _ := m.find(&m.current, k, m.hashFunc(k, m.seed)) 738 if kv != nil { 739 cont := f(kv.Key, kv.Value) 740 if !cont { 741 return 742 } 743 } 744 continue 745 } 746 747 // We are in in the middle of a grow that is different from the grow at iter start. 748 // In other words, m.old is now a "new" old. 749 // Do a full Get, which looks in the live m.current or m.old as needed. 750 v, ok := m.Get(k) 751 if !ok { 752 // Group was evacuated, but key not there now, so we don't emit anything 753 continue 754 } 755 // Key exists in live m.current, or possibly live m.old. Emit that copy. 756 // TODO: for floats, handle -0 vs. +0 (https://go.dev/play/p/mCN_sddUlG9) 757 cont := f(k, v) 758 if !cont { 759 return 760 } 761 continue 762 } 763 } 764 } 765 } 766 767 // No old, or we've reached the end of old. 768 // We now iterate over our snapshot of current, but we will skip anything present in 769 // the immutable old because it would have been already processed above. 770 loopMask := uint64(len(curControl)/16 - 1) 771 for i, group := 0, r&loopMask; i < len(curControl)/16; i, group = i+1, (group+1)&loopMask { 772 offsetMask := uint64(0x0F) 773 for j, offset := 0, (r>>61)&offsetMask; j < 16; j, offset = j+1, (offset+1)&offsetMask { 774 pos := group*16 + offset 775 if isStored(curControl[pos]) { 776 curGroup := uint64(pos / 16) 777 k := curSlots[pos].Key 778 779 if old != nil { 780 // We are about to look in old, but first, compute the hash for this key (frequently cheaply). 781 var h uint64 782 if !curHasDisplaced(growStatus[curGroup&old.groupMask]) { 783 // During a grow, we track when a group contains a displaced element. 784 // The group we are on does not have any displaced elemenets, which means 785 // we can reconstruct the useful portion of the hash from the group and h2 786 // This could help with cases like https://go.dev/issue/51410 when a map 787 // is in a growing state for an extended period. 788 // TODO: check cost and if worthwhile 789 h = cur.reconstructHash(curControl[pos], curGroup) 790 } else { 791 // Rare that a group in current would have displaced elems during a grow, 792 // but it means we must recompute the hash from scratch 793 h = m.hashFunc(k, m.seed) 794 } 795 796 // Look in old 797 kv, _, _ := m.find(old, k, h) 798 if kv != nil { 799 // This key exists in the immutable old, so already handled above in our loop over old 800 continue 801 } 802 } 803 804 // The key was not in old or there is no old. If the key is still live, we will emit it. 805 // Start by checking if m.current is the same as the snapshot of current we are iterating over. 806 if cur.groupMask == m.current.groupMask { 807 // They are the same, so we can simply emit from the snapshot 808 cont := f(k, curSlots[pos].Value) 809 if !cont { 810 return 811 } 812 continue 813 } 814 815 // Additional grows have happened since we started, so we need to check m.current and 816 // possibly a new m.old if needed, which is all handled by Get 817 // TODO: could pass in reconstructed hash here as well, though this is a rarer case compared to 818 // writes stopping and a map being "stuck" in the same growing state forever or long time. 819 v, ok := m.Get(k) 820 if !ok { 821 // key not there now, so we don't emit anything 822 continue 823 } 824 // Key exists in live current, or possibly live old. Emit. 825 // TODO: for floats, handle -0 vs. +0 826 cont := f(k, v) 827 if !cont { 828 return 829 } 830 continue 831 } 832 } 833 } 834 } 835 836 // isStored reports whether controlByte indicates a stored value. 837 // If leading bit is 0, it means there is a valid value in the corresponding 838 // slot in the table. (The next 7 bits are the h2 values). 839 // TODO: maybe isStored -> hasStored or similar? 840 func isStored(controlByte byte) bool { 841 return controlByte&(1<<7) == 0 842 } 843 844 // isEvacuated reports whether the group corresponding to statusByte 845 // has been moved from old to new. 846 // Note: this is just for the elements stored in that group in old, 847 // and does not mean all elements dispalced fro mthat group have been evacuated. 848 // TODO: collapse these flags down into fewer bits rather than using a full byte 849 // TODO: maybe make a type 850 func isEvacuated(statusByte byte) bool { 851 return statusByte&(1<<0) != 0 852 } 853 854 func setEvacuated(statusByte byte) byte { 855 return statusByte | (1 << 0) 856 } 857 858 // isChainEvacuated is similar to isEvacuated, but reports whether the group 859 // corresponding to statusByte has been moved from old to new 860 // along with any probe chains that orginate from that group. 861 // A group that does not have any chains originating from it can have isChainEvacuated true. 862 func isChainEvacuated(statusByte byte) bool { 863 return statusByte&(1<<1) != 0 864 } 865 866 func setChainEvacuated(statusByte byte) byte { 867 return statusByte | (1 << 1) 868 } 869 870 // curHasDisplaced indicates the group in current has displaced elements. 871 // It is only tracked during grow operations, and therefore is 872 // only very rarely set. If we are mid-grow, it means current was recently 873 // doubled in size and has not yet had enough elems added to complete the grow. 874 // TODO: verify this is a performance win for range 875 // TODO: consider oldHasDisplaced, but might be less of a win 876 // (additional book keeping, likely higher mispredictions than curHasDisplaced, ...). 877 func curHasDisplaced(statusByte byte) bool { 878 return statusByte&(1<<2) != 0 879 } 880 881 func setCurHasDisplaced(statusByte byte) byte { 882 return statusByte | (1 << 2) 883 } 884 885 // Number of elements stored in Map 886 // Should track this explicitly. 887 func (m *Map) Len() int { 888 return m.elemCount 889 } 890 891 // newFixedTable returns a *newFixedTable that is ready to use. 892 // A fixedTable can be copied. 893 func newFixedTable(tableSize int) *fixedTable { 894 // TODO: not using capacity in our make calls. Probably reasonable for straight swisstable impl? 895 896 if tableSize&(tableSize-1) != 0 || tableSize == 0 { 897 panic(fmt.Sprintf("table size %d is not power of 2", tableSize)) 898 } 899 900 slots := make([]KV, tableSize) 901 control := make([]byte, tableSize) 902 // Initialize all control bytes to empty 903 // TODO: consider using 0x00 for empty, or unroll, or set these with unsafe, or... 904 // A simple loop here is ~15% of time to construct a large capacity empty table. 905 for i := range control { 906 control[i] = emptySentinel 907 } 908 909 return &fixedTable{ 910 control: control, 911 slots: slots, 912 // 16 control bytes per group, table length is power of 2 913 groupMask: (uint64(tableSize) / 16) - 1, 914 // h2Shift gives h2 as the next 7 bits just above the group mask. 915 // (It is not the top 7 bits, which is what runtime map uses). 916 // TODO: small sanity of h2Shift; maybe make test: https://go.dev/play/p/DjmN7O4YrWI 917 h2Shift: uint8(bits.TrailingZeros(uint(tableSize / 16))), 918 } 919 } 920 921 func (t *fixedTable) findFirstEmptyOrDeleted(h uint64) (group uint64, offset int) { 922 group = h & t.groupMask 923 924 // Do quadratic probing. 925 var probeCount uint64 926 for { 927 bitmask := matchEmptyOrDeleted(t.control[group*16:]) 928 if bitmask != 0 { 929 // We have at least one hit 930 offset = bits.TrailingZeros32(bitmask) 931 return group, offset 932 } 933 934 // No matching empty or delete control byte. 935 // Keep probing to next group. (It's a bug if the whole table 936 // does not contain any empty or deleted positions). 937 probeCount++ 938 group = (group + probeCount) & t.groupMask 939 if debug && probeCount >= uint64(len(t.slots)/16) { 940 panic(fmt.Sprintf("impossible: probeCount: %d groups: %d underlying table len: %d", probeCount, len(t.slots)/16, len(t.slots))) 941 } 942 } 943 } 944 945 // h2 returns the 7 bits immediately above the bits covered by the table's groupMask 946 func (t *fixedTable) h2(h uint64) uint8 { 947 // TODO: does an extra mask here elim a shift check in the generated code? 948 return uint8((h >> uint64(t.h2Shift)) & 0x7f) 949 } 950 951 // reconstructHash reconstructs the bits of the original hash covered by 952 // the table's groupMask plus an additional 7 bits. In other words, it reconstructs 953 // the bits that we use elsewhere for h2 and the group (h1). It assumes 954 // controlByte contains the h2 (that is, that it corresponds to a stored position). 955 // TODO: runtime map might be able to use this approach? 956 func (t *fixedTable) reconstructHash(controlByte byte, group uint64) uint64 { 957 return group | ((uint64(controlByte) & 0x7F) << uint64(t.h2Shift)) 958 } 959 960 // calcTableSize returns the length to use 961 // for the storage slices to support 962 // capacityHint stored map elements. 963 func calcTableSize(capacityHint int) int { 964 // For now, follow Go maps with max of 6.5 entries per 8 elem buckets, 965 // which is 81.25% max load factor, rounded up to a power of 2. 966 // Our current minimum size is 16. 967 tableSize := int(float64(capacityHint) / (6.5 / 8)) 968 pow2 := 16 969 // TODO: clip max 970 for tableSize > pow2 { 971 pow2 = pow2 << 1 972 } 973 tableSize = pow2 974 975 // sanity check power of 2 976 if tableSize&(tableSize-1) != 0 || tableSize == 0 { 977 panic("impossible") 978 } 979 return tableSize 980 } 981 982 func zeroKey() Key { 983 return Key(0) 984 } 985 986 func zeroValue() Value { 987 return Value(0) 988 } 989 990 func hashUint64(k Key, seed uintptr) uint64 { 991 // earlier: uint64(memhash(unsafe.Pointer(&k), seed, uintptr(8))) 992 return uint64(memhash64(unsafe.Pointer(&k), seed)) 993 } 994 995 func hashString(s string, seed uintptr) uint64 { 996 return uint64(strhash(unsafe.Pointer(&s), seed)) 997 } 998 999 //go:linkname memhash runtime.memhash 1000 //go:noescape 1001 func memhash(p unsafe.Pointer, seed, s uintptr) uintptr 1002 1003 //go:linkname memhash64 runtime.memhash64 1004 //go:noescape 1005 func memhash64(p unsafe.Pointer, seed uintptr) uintptr 1006 1007 //go:linkname strhash runtime.strhash 1008 //go:noescape 1009 func strhash(p unsafe.Pointer, h uintptr) uintptr 1010 1011 // TODO: fastrand64 did not initially work 1012 //go:linkname fastrand runtime.fastrand 1013 func fastrand() uint32 1014 1015 func init() { 1016 if runtime.GOARCH != "amd64" { 1017 // the assembly is only amd64 without a pure Go fallback yet. 1018 // also, we are ignoring 32-bit in several places. 1019 panic("only amd64 is supported") 1020 } 1021 } 1022 1023 const debug = false