github.com/thepudds/swisstable@v0.0.0-20221011152303-9c77dc657777/map.go

github.com/thepudds/swisstable@v0.0.0-20221011152303-9c77dc657777/map.go (about)

     1  package swisstable
     2  
     3  import (
     4  	"fmt"
     5  	"math/bits"
     6  	"runtime"
     7  	"unsafe"
     8  )
     9  
    10  // Basic terminology:
    11  // 		map: overall data structure, internally organized into groups.
    12  //		group: a set of 16 contiguous positions that can be examined in parallel.
    13  // 		position: index within the overall linear table. Each position has a slot & control byte.
    14  //		slot: stores one key/value.
    15  //		control byte: metadata about a particular slot, including whether empty, deleted, or has a stored value.
    16  //		offset: index within a group.
    17  //		H1: hash(key) % group count. Corresponds to the natural (non-displaced) group for a given key.
    18  //		H2: 7 additional bits from hash(key). Stored in control byte.
    19  // 		count: number of live key/values. Returned via Len.
    20  // 		table size: len(slots).
    21  //
    22  // Individual positions can be EMPTY, DELETED, or STORED (containing a key/value).
    23  //
    24  // In addition, internally there is a fixedTable type that is a non-resizable Swisstable.
    25  // Map manages a current fixedTable, and when doing incremental growth, an old fixedTable.
    26  // During write operations to Map (Set/Delete), the old fixedTable is gradually
    27  // evacuated to the current fixedTable.
    28  //
    29  // Incremental growth without invalidating iterators presents some challenges, including
    30  // because a Swisstable can mark control bytes as EMPTY or DELETED to ensure probing chains
    31  // across groups are correctly followed to find any displaced elements. This must be
    32  // properly navigated when juggling an old and new table.
    33  //
    34  // The basic approach is to maintain an immutable old once growth starts, along with
    35  // some growth status bytes that are live for the duration of the growth, with one
    36  // byte per group. (This can be collapsed down to fewer bits, but we use a full byte for now).
    37  // Even with the extra growth status bytes, this still uses less memory than the runtime map,
    38  // which allocates extra overflow buckets that exceed the size of the growth status bytes
    39  // even for small key/values.
    40  //
    41  // If an iterator starts mid-growth, it walks both the old and new table, taking care
    42  // not to emit the same key twice. If growth completes, the iterator continues to walk
    43  // the old and new tables it started with. In both cases, it checks the live tables if needed to
    44  // get the live golden data. It attempts to avoid re-hashing in some cases by reconstructing
    45  // the hash from the group and 7-bits of stored h2. See the Range method for details.
    46  // (I think it re-hashes less than runtime map iterator. TODO: confirm).
    47  
    48  // Key, Value, and KV define our key and value types.
    49  // TODO: these are placeholder types for performance testing prior to using generics.
    50  type Key int64
    51  type Value int64
    52  type KV struct {
    53  	Key   Key
    54  	Value Value
    55  }
    56  
    57  type hashFunc func(k Key, seed uintptr) uint64
    58  
    59  // Control byte special values.
    60  // If the high bit is 1, it is a special sentinel value of EMPTY or DELETED.
    61  // If the high bit is 0, there is a STORED entry in the corresponding
    62  // slot in the table, and the next 7 bits are the h2 values. (This is called 'FULL'
    63  // in the original C++ swisstable implementation, but we call it STORED).
    64  // TODO: consider flipping meaning of first bit, possibly with 0x00 for empty and 0x7F for deleted?
    65  const emptySentinel = 0b1111_1111
    66  const deletedSentinel = 0b1000_0000
    67  
    68  // Map is a map, supporting Set, Get, Delete, Range and Len.
    69  // It is implemented via a modified Swisstable.
    70  // Unlike the original C++ Swisstable implementation,
    71  // Map supports incremental resizing without invalidating iterators.
    72  type Map struct {
    73  	// Internally, a Map manages one or two fixedTables to store key/values. Normally,
    74  	// it manages one fixedTable. While growing, it manages two fixedTables.
    75  
    76  	// current is a fixedTable containing the element array and metadata for the active fixedTable.
    77  	// Write operations (Set/Delete) on Map go to current.
    78  	current fixedTable
    79  
    80  	// old is only used during incremental growth.
    81  	// When growth starts, we move current to old, and no longer write or delete key/values in old,
    82  	// but instead gradually evacuate old to new on write operations (Set/Delete).
    83  	// Get and Range handle finding the correct "golden" data in either current or old.
    84  	old *fixedTable
    85  
    86  	// growStatus tracks what has happened on a group by group basis.
    87  	// To slightly simplify, currently each group gets a byte. TODO: could collapse that down to few bits.
    88  	growStatus []byte
    89  
    90  	sweepCursor uint64
    91  
    92  	// elemCount tracks the live count of key/values, and is returned by Len.
    93  	elemCount int
    94  
    95  	// when resizeThreshold is passed, we need to resize
    96  	// TODO: need to track DELETED count as well for resizing or compacting
    97  	resizeThreshold int
    98  
    99  	// currently for testing, we purposefully fill beyond the resizeThreshold.
   100  	// TODO: remove
   101  	disableResizing bool
   102  
   103  	// Our hash function, which generates a 64-bit hash
   104  	hashFunc hashFunc
   105  	seed     uintptr
   106  
   107  	// Flags tracking state.
   108  	// TODO: collapse down to single flag variable
   109  	// TODO: could use these flags to indicate OK to clear during evac
   110  	// haveIter    bool
   111  	// haveOldIter bool
   112  
   113  	// Internal stats to help observe behavior.
   114  	// TODO: eventually remove stats, not actively tracking some
   115  	gets                int
   116  	getH2FalsePositives int
   117  	getExtraGroups      int
   118  	resizeGenerations   int
   119  }
   120  
   121  // New returns a *Map that is ready to use.
   122  // capacity is a hint, and "at least".
   123  func New(capacity int) *Map {
   124  	// tableSize will be roughly 1/0.8 x user suggested capacity,
   125  	// rounded up to a power of 2.
   126  	// TODO: for now, should probably make capcity be at least 16 (group size)
   127  	// to temporarily simplify handling small maps (where small here is < 16).
   128  	tableSize := calcTableSize(capacity)
   129  
   130  	current := *newFixedTable(tableSize)
   131  
   132  	// TODO: for now, use same fill factor as the runtime map to
   133  	// make it easier to compare performance across different sizes.
   134  	resizeThreshold := (tableSize * 13) / 16 // TODO: centralize
   135  	return &Map{
   136  		current:         current,
   137  		hashFunc:        hashUint64,
   138  		seed:            uintptr(fastrand())<<32 | uintptr(fastrand()),
   139  		resizeThreshold: resizeThreshold,
   140  	}
   141  }
   142  
   143  // fixedTable does not support resizing.
   144  type fixedTable struct {
   145  	control []byte
   146  	slots   []KV
   147  	// groupCount int // TODO: consider using this, but maybe instead compare groupMask?
   148  	groupMask uint64
   149  	h2Shift   uint8
   150  
   151  	// track our count of deletes, which we use when determining when to resize
   152  	// TODO: dropping deletes without resize, or same size grow
   153  	// if zero, we can skip some logic in some operations
   154  	// TODO: check if that is a perf win
   155  	deleteCount int
   156  }
   157  
   158  // TODO: pick a key/value layout. Within the slots our current layout is KV|KV|KV|KV|..., vs.
   159  // the runtime's layout uses unsafe to access K|K|K|K|...|V|V|V|V|... per 8-elem bucket. That is more compact
   160  // if K & V are not aligned, but equally compact if they are aligned.
   161  // If we ignore alignment, our current layout might have better cache behavior
   162  // given high confidence that loading a key for example for lookup means you are about
   163  // to access the adjacent value (which for typical key sizes would be in same or adjacent cache line).
   164  // Folly F14 layout though is probably better overall than runtime layout or our current layout.
   165  // (F14FastMap picks between values inline vs. values packed in a contiguous array based on entry size:
   166  //    https://github.com/facebook/folly/blob/main/folly/container/F14.md#f14-variants )
   167  
   168  func (m *Map) Get(k Key) (v Value, ok bool) {
   169  	h := m.hashFunc(k, m.seed)
   170  
   171  	if m.old == nil || isChainEvacuated(m.growStatus[h&m.old.groupMask]) {
   172  		// We are either not growing, which is the simple case, and we
   173  		// can just look in m.current, or we are growing but we have
   174  		// recorded that any keys with the natural group of this key
   175  		// have already been moved to m.current, which also means we
   176  		// can just look in m.current.
   177  		kv, _, _ := m.find(&m.current, k, h)
   178  		if kv != nil {
   179  			return kv.Value, true
   180  		}
   181  		return zeroValue(), false
   182  	}
   183  
   184  	// We are growing.
   185  	// TODO: maybe extract to findGrowing or similar. Would be nice to do midstack inlining for common case.
   186  	oldNatGroup := h & m.old.groupMask
   187  	oldNatGroupEvac := isEvacuated(m.growStatus[oldNatGroup])
   188  	table := &m.current
   189  	if !oldNatGroupEvac {
   190  		// The key has never been written/deleted in current since this grow started
   191  		// (because we always move the natural group when writing/deleting a key while growing).
   192  		table = m.old
   193  	}
   194  	kv, _, _ := m.find(table, k, h)
   195  	if kv != nil {
   196  		// Hit
   197  		return kv.Value, true
   198  	}
   199  	if !oldNatGroupEvac {
   200  		// Miss in old, and the key has never been written/deleted in current since grow started,
   201  		// so this is a miss for the overall map.
   202  		return zeroValue(), false
   203  	}
   204  
   205  	// We had a miss in current, and the old natural group was evacuated,
   206  	// but it is not yet conclusive if we have an overall miss. For example,
   207  	// perhaps a displaced key in old was moved to current and later deleted, or
   208  	// perhaps a displaced key was never moved to current and the golden copy is still in old.
   209  	// Side note: for any mid-growth map, the majority of groups are one of (a) not yet evacuated, or
   210  	// (b) evacuated and this Get is for a non-displaced key (because most keys are not displaced),
   211  	// so the work we did above handled that majority of groups.
   212  	// Now we do more work for less common cases.
   213  
   214  	oldKv, oldDisplGroup, _ := m.find(m.old, k, h)
   215  	if oldNatGroup == oldDisplGroup {
   216  		// We already know from above that this group was evacuated,
   217  		// which means if there was a prior matching key in this group,
   218  		// it would have been evacuated to current.
   219  		// Given it is not in current now, this is a miss for the overall map.
   220  		return zeroValue(), false
   221  	}
   222  	if oldKv != nil && !isEvacuated(m.growStatus[oldDisplGroup]) {
   223  		// Hit for the overall map. This is a group with a displaced matching key, and
   224  		// we've never written/deleted this key since grow started,
   225  		// so golden copy is in old.
   226  		// (This is example of us currently relying on always evacuating displaced key
   227  		// on write/delete).
   228  		// TODO: no non-fuzzing test hits this. might require longer probe chain. the fuzzing might hit.
   229  		return oldKv.Value, true
   230  	}
   231  	// Miss. The displaced group was evacuated to current, but current doesn't have the key
   232  	return zeroValue(), false
   233  }
   234  
   235  // find searches the fixedTable for a key.
   236  // For a hit, group is the location of the key, and offset is the location within the group.
   237  // For a miss, group is the last probed group.
   238  func (m *Map) find(t *fixedTable, k Key, h uint64) (kv *KV, group uint64, offset int) {
   239  	// TODO: likely giving up some of performance by sharing find between Get and Delete
   240  	group = h & t.groupMask
   241  	h2 := t.h2(h)
   242  
   243  	// TODO: could try hints to elim some bounds check below with additional masking? maybe:
   244  	// controlLenMask := len(m.current.control) - 1
   245  	// slotsLenMask := len(m.current.slots) - 1
   246  
   247  	var probeCount uint64
   248  
   249  	// Do quadratic probing.
   250  	// This loop will terminate because (1) incrementing by
   251  	// triangluar numbers will hit every slot in a power of 2 sized table
   252  	// and (2) we always enforce at least some empty slots by resizing when needed.
   253  	for {
   254  		pos := group * 16
   255  		controlBytes := t.control[pos:]
   256  		bitmask, ok := MatchByte(h2, controlBytes)
   257  		if debug && !ok {
   258  			panic("short control byte slice")
   259  		}
   260  		for bitmask != 0 {
   261  			// We have at least one hit on h2
   262  			offset = bits.TrailingZeros32(bitmask)
   263  			kv := &t.slots[int(pos)+offset]
   264  			if kv.Key == k {
   265  				return kv, group, offset
   266  			}
   267  			// TODO: is this right? The test coverage hits this, but
   268  			// getting lower than expected false positives in benchmarks, maybe?
   269  			// (but current benchmarks might have more conservative fill currently?)
   270  			// m.getH2FalsePositives++ // stats.
   271  
   272  			// continue to look. infrequent with 7 bit h2.
   273  			// clear the bit we just checked.
   274  			bitmask &^= 1 << offset
   275  		}
   276  
   277  		// No matching h2, or we had a matching h2
   278  		// but failed to find an equal key in loop just above.
   279  		// Check if this group is full or has at least one empty slot.
   280  		// TODO: call it H1 and H2, removing h2 term
   281  		// TODO: can likely skip getting the offset below and just test bitmask > 0
   282  		emptyBitmask, ok := MatchByte(emptySentinel, t.control[group*16:])
   283  		if debug && !ok {
   284  			panic("short control byte slice")
   285  		}
   286  
   287  		// If we have any EMPTY positions, we know the key we were
   288  		// looking to find was never displaced outside this group
   289  		// by quadratic probing during Set and hence can we stop now at this group
   290  		// (most often the key's natural group).
   291  		if emptyBitmask != 0 {
   292  			return nil, group, offset
   293  		}
   294  
   295  		// This group is full or contains STORED/DELETE without any EMPTY,
   296  		// so continue on to the next group.
   297  		// We don't do quadratic probing within a group, but we do
   298  		// quadratic probing across groups.
   299  		// Continue our quadratic probing across groups, using triangular numbers.
   300  		// TODO: rust implementation uses a ProbeSeq and later C++ also has a probe seq; could consider something similar
   301  		// m.getExtraGroups++ // stats
   302  		probeCount++
   303  		group = (group + probeCount) & t.groupMask
   304  		if debug && probeCount >= uint64(len(t.slots)/16) {
   305  			panic(fmt.Sprintf("impossible: probeCount: %d groups: %d underlying table len: %d", probeCount, len(t.slots)/16, len(t.slots)))
   306  		}
   307  	}
   308  }
   309  
   310  // Set sets k and v within the map.
   311  func (m *Map) Set(k Key, v Value) {
   312  	// Write the element, incrementing element count if needed and moving if needed.
   313  	m.set(k, v, 1, true)
   314  }
   315  
   316  // set sets k and v within the map, returning group and the probe count.
   317  // elemIncr indicates if we should increment elementCount when populating
   318  // a free slot. A zero enables us to use set when evacuating,
   319  // which does not change the number of elements.
   320  // moveIfNeeded indicates if we should do move operations if currently growing.
   321  func (m *Map) set(k Key, v Value, elemIncr int, moveIfNeeded bool) {
   322  	h := m.hashFunc(k, m.seed)
   323  	group := h & m.current.groupMask
   324  	h2 := m.current.h2(h)
   325  
   326  	if moveIfNeeded && m.old != nil {
   327  		// We are growing. Move groups if needed
   328  		m.moveGroups(group, k, h)
   329  	}
   330  
   331  	var probeCount uint64
   332  	// Do quadratic probing.
   333  	// This loop will terminate for same reasons as find loop.
   334  	for {
   335  		bitmask, ok := MatchByte(h2, m.current.control[group*16:])
   336  		if debug && !ok {
   337  			panic("short control byte slice")
   338  		}
   339  
   340  		for bitmask != 0 {
   341  			// We have at least one hit on h2
   342  			offset := bits.TrailingZeros32(bitmask)
   343  			pos := int(group*16) + offset
   344  			kv := m.current.slots[pos]
   345  			if kv.Key == k {
   346  				// update the existing key. Note we don't increment the elem count because we are replacing.
   347  				m.current.control[pos] = h2
   348  				m.current.slots[pos] = KV{Key: k, Value: v}
   349  				// Track if we have any displaced elements in current while growing. This is rare.
   350  				// TODO: This might not be a net perf win.
   351  				if m.old != nil && probeCount != 0 {
   352  					oldGroup := group & m.old.groupMask
   353  					m.growStatus[oldGroup] = setCurHasDisplaced(m.growStatus[oldGroup])
   354  				}
   355  				return
   356  			}
   357  
   358  			// continue to look. infrequent with 7 bit h2.
   359  			// clear the bit we just checked.
   360  			bitmask &^= 1 << offset
   361  		}
   362  
   363  		// No matching h2, or we had a matching h2
   364  		// but failed to find an equal key in loop just above.
   365  		// See if this is the end of our probe chain, which is indicated
   366  		// by the presence of an EMPTY slot.
   367  		emptyBitmask := matchEmpty(m.current.control[group*16:])
   368  		if emptyBitmask != 0 {
   369  			// We've reached the end of our probe chain without finding
   370  			// a match on an existing key.
   371  			if m.elemCount+m.current.deleteCount >= m.resizeThreshold && !m.disableResizing {
   372  				// Double our size
   373  				m.startResize()
   374  
   375  				// Also set the key we are working on, then we are done.
   376  				// (Simply re-using Set here causes tiny bit of extra work when resizing;
   377  				// we could instead let findFirstEmptyOrDeleted below handle it,
   378  				// but we would need to at least recalc h2).
   379  				// This is our first modification in our new table,
   380  				// and we want to move the group(s) that correspond to this key.
   381  				m.set(k, v, 1, true)
   382  				return
   383  			}
   384  
   385  			var offset int
   386  			if m.current.deleteCount == 0 || probeCount == 0 {
   387  				// If we've never used a DELETED tombstone in this fixedTable,
   388  				// the first group containing usable space is this group with its EMPTY slot,
   389  				// which might be at the end of a probe chain, and we can use it now.
   390  				// If instead we have DELETED somewhere but we have not just now probed beyond
   391  				// the natural group, we can use an EMPTY slot in the natural group.
   392  				// Either way, set the entry in this group using its first EMPTY slot.
   393  				// TODO: double-check this is worthwhile given this
   394  				// is an optimization that might not be in the C++ implementation?
   395  				offset = bits.TrailingZeros32(emptyBitmask)
   396  			} else {
   397  				// We know there is room in the group we are on,
   398  				// but we might have passed a usable DELETED slot during our
   399  				// probing, so we rewind to this key's natural group and
   400  				// probe forward from there,
   401  				// and use the first EMPTY or DELETED slot found.
   402  				group, offset = m.current.findFirstEmptyOrDeleted(h)
   403  			}
   404  
   405  			// update empty or deleted slot
   406  			pos := int(group*16) + offset
   407  			if m.current.control[pos] == deletedSentinel {
   408  				m.current.deleteCount--
   409  			}
   410  			m.current.control[pos] = h2
   411  			m.current.slots[pos] = KV{Key: k, Value: v}
   412  			m.elemCount += elemIncr
   413  			// Track if we have any displaced elements in current while growing. This is rare.
   414  			if m.old != nil && probeCount != 0 {
   415  				oldGroup := group & m.old.groupMask
   416  				m.growStatus[oldGroup] = setCurHasDisplaced(m.growStatus[oldGroup])
   417  			}
   418  			return
   419  		}
   420  
   421  		// We did not find an available slot.
   422  		// We don't do quadratic probing within a group, but we do
   423  		// quadratic probing across groups.
   424  		// Continue our quadratic probing across groups, using triangular numbers.
   425  		probeCount++
   426  		group = (group + probeCount) & m.current.groupMask
   427  
   428  		if debug && probeCount >= uint64(len(m.current.slots)/16) {
   429  			panic(fmt.Sprintf("impossible: probeCount: %d groups: %d underlying table len: %d", probeCount, len(m.current.slots)/16, len(m.current.slots)))
   430  		}
   431  	}
   432  }
   433  
   434  // startResize creates a new fixedTable with doubled table size,
   435  // then copies the elements from the old table to the new table,
   436  // leaving the new table as a ready-to-use current.
   437  func (m *Map) startResize() {
   438  	// prepare for a new, larger and initially empty current.
   439  	m.resizeThreshold = m.resizeThreshold << 1
   440  	newTableSize := len(m.current.control) << 1
   441  
   442  	// place current in old, and create a new current
   443  	m.old = &fixedTable{}
   444  	*m.old = m.current
   445  	m.current = *newFixedTable(newTableSize)
   446  
   447  	// get ready to track our grow operation
   448  	m.growStatus = make([]byte, len(m.old.control))
   449  	m.sweepCursor = 0
   450  
   451  	// TODO: temp stat for now
   452  	m.resizeGenerations++
   453  }
   454  
   455  // moveGroups takes a group in current along with a
   456  // key that is triggering the move. It only expects to be called
   457  // while growing. It moves up to three groups:
   458  //   1. the natural group for this key
   459  //   2. the group this key is located in if it is displaced in old from its natural group
   460  //   3. incrementally move from the front, including to ensure we finish and don't miss any groups
   461  func (m *Map) moveGroups(group uint64, k Key, h uint64) {
   462  	allowedMoves := 2
   463  
   464  	// First, if the natural group for this key has not been moved, move it
   465  	oldNatGroup := group & m.old.groupMask
   466  	if !isEvacuated(m.growStatus[oldNatGroup]) {
   467  		m.moveGroup(oldNatGroup)
   468  		allowedMoves--
   469  	}
   470  
   471  	if !isChainEvacuated(m.growStatus[oldNatGroup]) {
   472  		// Walk the chain that started at the natural group, moving any unmoved groups as we go.
   473  		// If we move the complete chain, we mark the natural group as ChainEvacuated with moveChain.
   474  		// The first group we'll visit is the one after the natural group (probeCount of 1).
   475  		var chainEnd bool
   476  		allowedMoves, chainEnd = m.moveChain(oldNatGroup, 1, allowedMoves)
   477  
   478  		// We walked the chain as far we could.
   479  		if !chainEnd {
   480  			// Rare case.
   481  			// Our key might be displaced from its natural group in old,
   482  			// and we did not complete the chain, so we might not have
   483  			// reached the actual group with the key.
   484  			// We rely elsewhere (such as in Get) upon always moving the actual group
   485  			// containing the key when an existing key is Set/Deleted.
   486  			// Find the key. Note that we don't need to recompute the hash.
   487  			kv, oldDisplGroup, _ := m.find(m.old, k, h)
   488  			if kv != nil && oldDisplGroup != oldNatGroup {
   489  				if !isEvacuated(m.growStatus[oldDisplGroup]) {
   490  					// Not moved yet, so move it.
   491  					// TODO: non-fuzzing tests don't hit this. fuzzing hasn't reached this branch either (so far).
   492  					m.moveGroup(oldDisplGroup)
   493  					allowedMoves-- // Can reach -1 here. Rare, should be ok.
   494  				}
   495  			}
   496  		}
   497  	}
   498  
   499  	stopCursor := uint64(len(m.old.control)) / 16
   500  	if stopCursor > m.sweepCursor+1000 {
   501  		stopCursor = m.sweepCursor + 1000
   502  	}
   503  	for m.sweepCursor < stopCursor {
   504  		// Walk up to N groups looking for something to move and/or to mark ChainEvacuated.
   505  		// The sweepCursor group is marked ChainEvacuated if we evac through the end of the chain.
   506  		// The majority of the time, sweepCursor is a singleton chain or is otherwise the end of a chain.
   507  		if !isChainEvacuated(m.growStatus[m.sweepCursor]) {
   508  			allowedMoves, _ = m.moveChain(m.sweepCursor, 0, allowedMoves)
   509  		}
   510  		if isChainEvacuated(m.growStatus[m.sweepCursor]) {
   511  			m.sweepCursor++
   512  			continue
   513  		}
   514  		if allowedMoves <= 0 {
   515  			break
   516  		}
   517  	}
   518  
   519  	// Check if we are now done
   520  	if m.sweepCursor >= (uint64(len(m.old.control)) / 16) {
   521  		// Done growing!
   522  		// TODO: we have some test coverage of this, but would be nice to have more explicit test
   523  		// TODO: maybe extract a utility func
   524  		m.old = nil
   525  		m.growStatus = nil
   526  		m.sweepCursor = 0
   527  	}
   528  }
   529  
   530  // moveChain walks a probe chain that starts at a natural group, moving unmoved groups.
   531  // The probeCount parameter allows it to begin in the middle of a walk.
   532  // moveChain returns the number of remaining allowedMoves and a bool indicating
   533  // if the end of chain has been reached.
   534  // Each moved group is marked as being evacuated, and if a chain is completely
   535  // evacuated, the starting natural group is marked ChainEvacuated.
   536  func (m *Map) moveChain(oldNatGroup uint64, probeCount uint64, allowedMoves int) (int, bool) {
   537  	g := (oldNatGroup + probeCount) & m.old.groupMask
   538  
   539  	for allowedMoves > 0 {
   540  		if !isEvacuated(m.growStatus[g]) {
   541  			// Evacute.
   542  			m.moveGroup(g)
   543  			allowedMoves--
   544  		}
   545  		if matchEmpty(m.old.control[g*16:]) != 0 {
   546  			// Done with the chain. Record that.
   547  			m.growStatus[oldNatGroup] = setChainEvacuated(m.growStatus[oldNatGroup])
   548  			// chainEnd is true
   549  			return allowedMoves, true
   550  		}
   551  		probeCount++
   552  		g = (g + probeCount) & m.old.groupMask
   553  	}
   554  	return allowedMoves, false
   555  }
   556  
   557  // moveGroup takes a group in old, and moves it to current.
   558  // It only moves that group, and does not cascade to other groups
   559  // (even if moving the group writes displaced elements to other groups).
   560  func (m *Map) moveGroup(group uint64) {
   561  	for offset, b := range m.old.control[group*16 : group*16+16] {
   562  		if isStored(b) {
   563  			// TODO: cleanup
   564  			kv := m.old.slots[group*16+uint64(offset)]
   565  
   566  			// We are re-using the set mechanism to write to
   567  			// current, but we don't want cascading moves of other groups
   568  			// based on this write, so moveIfNeeded is false.
   569  			// TODO: m.set does a little more work than strictly required,
   570  			// including we know key is not present in current yet, so could avoid MatchByte(h2) and
   571  			// some other logic.
   572  			m.set(kv.Key, kv.Value, 0, false)
   573  		}
   574  	}
   575  	// Mark it evacuated.
   576  	m.growStatus[group] = setEvacuated(m.growStatus[group])
   577  
   578  	if matchEmpty(m.old.control[group*16:]) != 0 {
   579  		// The probe chain starting at this group ends at this group,
   580  		// so we can also mark it ChainEvacuated.
   581  		m.growStatus[group] = setChainEvacuated(m.growStatus[group])
   582  	}
   583  }
   584  
   585  func (m *Map) Delete(k Key) {
   586  	// TODO: make a 'delete' with moveIfNeeded
   587  
   588  	h := m.hashFunc(k, m.seed)
   589  	group := h & m.current.groupMask
   590  	if m.old != nil {
   591  		// We are growing. Move groups if needed
   592  		// TODO: don't yet have a test that hits this (Delete while growing)
   593  		m.moveGroups(group, k, h)
   594  	}
   595  
   596  	kv, group, offset := m.find(&m.current, k, h)
   597  	if kv == nil {
   598  		return
   599  	}
   600  
   601  	// Mark existing key as deleted or empty.
   602  	// In the common case we can set this position back to empty.
   603  	var sentinel byte = emptySentinel
   604  
   605  	// However, we need to check if there are any EMPTY positions in this group
   606  	emptyBitmask, ok := MatchByte(emptySentinel, m.current.control[group*16:])
   607  	if debug && !ok {
   608  		panic("short control byte slice")
   609  	}
   610  	if emptyBitmask == 0 {
   611  		// We must use a DELETED tombstone because there are no remaining
   612  		// positions marked EMPTY (which means there might have been displacement
   613  		// past this group in the past by quadratic probing, and hence we use tombstones to make
   614  		// sure we follow any displacement chain properly in any future operations).
   615  		sentinel = deletedSentinel
   616  		m.current.deleteCount++
   617  	}
   618  
   619  	pos := int(group*16) + offset
   620  	m.current.control[pos] = sentinel
   621  	// TODO: for a pointer, would want to set nil. could do with 'zero' generics func.
   622  	m.current.slots[pos] = KV{}
   623  	m.elemCount--
   624  }
   625  
   626  // matchEmptyOrDeleted checks if the first 16 bytes of controlBytes has
   627  // any empty or deleted sentinels, returning a bitmask of the corresponding offsets.
   628  // TODO: can optimize this via SSE (e.g., check high bit via _mm_movemask_epi8 or similar).
   629  func matchEmptyOrDeleted(controlBytes []byte) uint32 {
   630  	emptyBitmask, ok := MatchByte(emptySentinel, controlBytes)
   631  	deletedBitmask, ok2 := MatchByte(deletedSentinel, controlBytes)
   632  	if debug && !(ok && ok2) {
   633  		panic("short control byte slice")
   634  	}
   635  	return emptyBitmask | deletedBitmask
   636  }
   637  
   638  // matchEmpty checks if the first 16 bytes of controlBytes has
   639  // any empty sentinels, returning a bitmask of the corresponding offsets.
   640  func matchEmpty(controlBytes []byte) uint32 {
   641  	emptyBitmask, ok := MatchByte(emptySentinel, controlBytes)
   642  	if debug && !ok {
   643  		panic("short control byte slice")
   644  	}
   645  	return emptyBitmask
   646  }
   647  
   648  func (m *Map) Range(f func(key Key, value Value) bool) {
   649  	// We iterate over snapshots of old and current tables, looking up
   650  	// the golden data in the live tables as needed. It might be that the live
   651  	// tables have a different value, or the live tables might have deleted the key,
   652  	// both of which we must respect at the moment we emit a key/value during iteration.
   653  	// However, we are not obligated to iterate over all the keys in the
   654  	// live tables -- we are allowed to emit a key added after iteration start, but
   655  	// are not required to do so.
   656  	//
   657  	// When iterating over our snapshot of old, we emit all keys encountered that are
   658  	// still present in the live tables. We then iterate over our snapshot of current,
   659  	// but skip any key present in the immutable old snapshot to avoid duplicates.
   660  	//
   661  	// In some cases, we can emit without a lookup, but in other cases we need to do a
   662  	// lookup in another table. We have some logic to minimize rehashing. While iterating
   663  	// over old, we typically need to rehash keys in evacuated groups, but while iterating
   664  	// over current, the common case is we do not need to rehash even to do a lookup.
   665  	//
   666  	// A Set or Delete is allowed during an iteration (e.g., a Set within the user's code
   667  	// invoked by Range might cause growth to start or finish), but not concurrently.
   668  	// For example, iterating while concurrently calling Set from another goroutine
   669  	// would be a user-level data race (similar to runtime maps).
   670  	//
   671  	// TODO: clean up comments and add better intro.
   672  	// TODO: make an iter struct, with a calling sequence like iterstart and iternext
   673  
   674  	// Begin by storing some snapshots of our tables.
   675  	// For example, another m.old could appear later if a
   676  	// new grow starts after this iterator starts.
   677  	// We want to iterate over the old that we started with.
   678  	// Note that old is immutable once we start growing.
   679  	// TODO: maybe gather these, such as:
   680  	// type iter struct { old, growStatus, current, oldPos, curPos, ... }
   681  	old := m.old
   682  	growStatus := m.growStatus
   683  
   684  	// A new m.current can also be created mid iteration, so snapshot
   685  	// it as well so that we can iterate over the current we started with.
   686  	cur := m.current
   687  	curControl := m.current.control[:] // TODO: maybe not needed, and/or collapse these?
   688  	curSlots := m.current.slots[:]     // TODO: same
   689  
   690  	// Below, we pick a random starting group and starting offset within that group.
   691  	r := (uint64(fastrand()) << 32) | uint64(fastrand())
   692  	if m.seed == 0 || m.seed == 42 {
   693  		// TODO: currently forcing repeatability for some tests, including fuzzing, but eventually remove
   694  		r = 0
   695  	}
   696  
   697  	// Now, iterate over our snapshot of old.
   698  	if old != nil {
   699  		for i, group := 0, r&old.groupMask; i < len(old.control)/16; i, group = i+1, (group+1)&old.groupMask {
   700  			offsetMask := uint64(0x0F)
   701  			for j, offset := 0, (r>>61)&offsetMask; j < 16; j, offset = j+1, (offset+1)&offsetMask {
   702  				pos := group*16 + offset
   703  				// Iterate over control bytes individually for now.
   704  				// TODO: consider 64-bit check of control bytes or SSE operations (e.g., _mm_movemask_epi8).
   705  				if isStored(old.control[pos]) {
   706  					k := old.slots[pos].Key
   707  
   708  					// We don't need to worry about displacements here when checking
   709  					// evacuation status. (We are iterating over each control byte, wherever they have landed).
   710  					if !isEvacuated(growStatus[pos/16]) {
   711  						// Not evac. Because we always move both a key's natural group
   712  						// and the key's displaced group for any Set or Delete, not evac means
   713  						// we know nothing in this group has ever
   714  						// been written or deleted in current, which means
   715  						// the key/value here in old are the golden data,
   716  						// which we use now. (If grow had completed, or if there
   717  						// have been multiple generations of growing, our snapshot
   718  						// of old will have everything evacuated).
   719  						// TODO: current non-fuzzing tests don't hit this. fuzzing does ;-)
   720  						cont := f(k, old.slots[pos].Value)
   721  						if !cont {
   722  							return
   723  						}
   724  						continue
   725  					}
   726  
   727  					// Now we handle the evacuated case. This key at one time was moved to current.
   728  					// Check where the golden data resides now, and emit the live key/value if they still exist.
   729  					// TODO: could probably do less work, including avoiding lookup/hashing in same cases
   730  
   731  					if cur.groupMask == m.current.groupMask || m.old == nil {
   732  						// We still in the same grow as when the iter started,
   733  						// or that grow is finished and we are not in the middle
   734  						// of a different grow, so we don't need to look in m.old
   735  						// (because this elem is already evacuated, or m.old doesn't exist),
   736  						// and hence can just look in m.current.
   737  						kv, _, _ := m.find(&m.current, k, m.hashFunc(k, m.seed))
   738  						if kv != nil {
   739  							cont := f(kv.Key, kv.Value)
   740  							if !cont {
   741  								return
   742  							}
   743  						}
   744  						continue
   745  					}
   746  
   747  					// We are in in the middle of a grow that is different from the grow at iter start.
   748  					// In other words, m.old is now a "new" old.
   749  					// Do a full Get, which looks in the live m.current or m.old as needed.
   750  					v, ok := m.Get(k)
   751  					if !ok {
   752  						// Group was evacuated, but key not there now, so we don't emit anything
   753  						continue
   754  					}
   755  					// Key exists in live m.current, or possibly live m.old. Emit that copy.
   756  					// TODO: for floats, handle -0 vs. +0 (https://go.dev/play/p/mCN_sddUlG9)
   757  					cont := f(k, v)
   758  					if !cont {
   759  						return
   760  					}
   761  					continue
   762  				}
   763  			}
   764  		}
   765  	}
   766  
   767  	// No old, or we've reached the end of old.
   768  	// We now iterate over our snapshot of current, but we will skip anything present in
   769  	// the immutable old because it would have been already processed above.
   770  	loopMask := uint64(len(curControl)/16 - 1)
   771  	for i, group := 0, r&loopMask; i < len(curControl)/16; i, group = i+1, (group+1)&loopMask {
   772  		offsetMask := uint64(0x0F)
   773  		for j, offset := 0, (r>>61)&offsetMask; j < 16; j, offset = j+1, (offset+1)&offsetMask {
   774  			pos := group*16 + offset
   775  			if isStored(curControl[pos]) {
   776  				curGroup := uint64(pos / 16)
   777  				k := curSlots[pos].Key
   778  
   779  				if old != nil {
   780  					// We are about to look in old, but first, compute the hash for this key (frequently cheaply).
   781  					var h uint64
   782  					if !curHasDisplaced(growStatus[curGroup&old.groupMask]) {
   783  						// During a grow, we track when a group contains a displaced element.
   784  						// The group we are on does not have any displaced elemenets, which means
   785  						// we can reconstruct the useful portion of the hash from the group and h2
   786  						// This could help with cases like https://go.dev/issue/51410 when a map
   787  						// is in a growing state for an extended period.
   788  						// TODO: check cost and if worthwhile
   789  						h = cur.reconstructHash(curControl[pos], curGroup)
   790  					} else {
   791  						// Rare that a group in current would have displaced elems during a grow,
   792  						// but it means we must recompute the hash from scratch
   793  						h = m.hashFunc(k, m.seed)
   794  					}
   795  
   796  					// Look in old
   797  					kv, _, _ := m.find(old, k, h)
   798  					if kv != nil {
   799  						// This key exists in the immutable old, so already handled above in our loop over old
   800  						continue
   801  					}
   802  				}
   803  
   804  				// The key was not in old or there is no old. If the key is still live, we will emit it.
   805  				// Start by checking if m.current is the same as the snapshot of current we are iterating over.
   806  				if cur.groupMask == m.current.groupMask {
   807  					// They are the same, so we can simply emit from the snapshot
   808  					cont := f(k, curSlots[pos].Value)
   809  					if !cont {
   810  						return
   811  					}
   812  					continue
   813  				}
   814  
   815  				// Additional grows have happened since we started, so we need to check m.current and
   816  				// possibly a new m.old if needed, which is all handled by Get
   817  				// TODO: could pass in reconstructed hash here as well, though this is a rarer case compared to
   818  				// writes stopping and a map being "stuck" in the same growing state forever or long time.
   819  				v, ok := m.Get(k)
   820  				if !ok {
   821  					// key not there now, so we don't emit anything
   822  					continue
   823  				}
   824  				// Key exists in live current, or possibly live old. Emit.
   825  				// TODO: for floats, handle -0 vs. +0
   826  				cont := f(k, v)
   827  				if !cont {
   828  					return
   829  				}
   830  				continue
   831  			}
   832  		}
   833  	}
   834  }
   835  
   836  // isStored reports whether controlByte indicates a stored value.
   837  // If leading bit is 0, it means there is a valid value in the corresponding
   838  // slot in the table. (The next 7 bits are the h2 values).
   839  // TODO: maybe isStored -> hasStored or similar?
   840  func isStored(controlByte byte) bool {
   841  	return controlByte&(1<<7) == 0
   842  }
   843  
   844  // isEvacuated reports whether the group corresponding to statusByte
   845  // has been moved from old to new.
   846  // Note: this is just for the elements stored in that group in old,
   847  // and does not mean all elements dispalced fro mthat group have been evacuated.
   848  // TODO: collapse these flags down into fewer bits rather than using a full byte
   849  // TODO: maybe make a type
   850  func isEvacuated(statusByte byte) bool {
   851  	return statusByte&(1<<0) != 0
   852  }
   853  
   854  func setEvacuated(statusByte byte) byte {
   855  	return statusByte | (1 << 0)
   856  }
   857  
   858  // isChainEvacuated is similar to isEvacuated, but reports whether the group
   859  // corresponding to statusByte has been moved from old to new
   860  // along with any probe chains that orginate from that group.
   861  // A group that does not have any chains originating from it can have isChainEvacuated true.
   862  func isChainEvacuated(statusByte byte) bool {
   863  	return statusByte&(1<<1) != 0
   864  }
   865  
   866  func setChainEvacuated(statusByte byte) byte {
   867  	return statusByte | (1 << 1)
   868  }
   869  
   870  // curHasDisplaced indicates the group in current has displaced elements.
   871  // It is only tracked during grow operations, and therefore is
   872  // only very rarely set. If we are mid-grow, it means current was recently
   873  // doubled in size and has not yet had enough elems added to complete the grow.
   874  // TODO: verify this is a performance win for range
   875  // TODO: consider oldHasDisplaced, but might be less of a win
   876  // (additional book keeping, likely higher mispredictions than curHasDisplaced, ...).
   877  func curHasDisplaced(statusByte byte) bool {
   878  	return statusByte&(1<<2) != 0
   879  }
   880  
   881  func setCurHasDisplaced(statusByte byte) byte {
   882  	return statusByte | (1 << 2)
   883  }
   884  
   885  // Number of elements stored in Map
   886  // Should track this explicitly.
   887  func (m *Map) Len() int {
   888  	return m.elemCount
   889  }
   890  
   891  // newFixedTable returns a *newFixedTable that is ready to use.
   892  // A fixedTable can be copied.
   893  func newFixedTable(tableSize int) *fixedTable {
   894  	// TODO: not using capacity in our make calls. Probably reasonable for straight swisstable impl?
   895  
   896  	if tableSize&(tableSize-1) != 0 || tableSize == 0 {
   897  		panic(fmt.Sprintf("table size %d is not power of 2", tableSize))
   898  	}
   899  
   900  	slots := make([]KV, tableSize)
   901  	control := make([]byte, tableSize)
   902  	// Initialize all control bytes to empty
   903  	// TODO: consider using 0x00 for empty, or unroll, or set these with unsafe, or...
   904  	// A simple loop here is ~15% of time to construct a large capacity empty table.
   905  	for i := range control {
   906  		control[i] = emptySentinel
   907  	}
   908  
   909  	return &fixedTable{
   910  		control: control,
   911  		slots:   slots,
   912  		// 16 control bytes per group, table length is power of 2
   913  		groupMask: (uint64(tableSize) / 16) - 1,
   914  		// h2Shift gives h2 as the next 7 bits just above the group mask.
   915  		// (It is not the top 7 bits, which is what runtime map uses).
   916  		// TODO: small sanity of h2Shift; maybe make test: https://go.dev/play/p/DjmN7O4YrWI
   917  		h2Shift: uint8(bits.TrailingZeros(uint(tableSize / 16))),
   918  	}
   919  }
   920  
   921  func (t *fixedTable) findFirstEmptyOrDeleted(h uint64) (group uint64, offset int) {
   922  	group = h & t.groupMask
   923  
   924  	// Do quadratic probing.
   925  	var probeCount uint64
   926  	for {
   927  		bitmask := matchEmptyOrDeleted(t.control[group*16:])
   928  		if bitmask != 0 {
   929  			// We have at least one hit
   930  			offset = bits.TrailingZeros32(bitmask)
   931  			return group, offset
   932  		}
   933  
   934  		// No matching empty or delete control byte.
   935  		// Keep probing to next group. (It's a bug if the whole table
   936  		// does not contain any empty or deleted positions).
   937  		probeCount++
   938  		group = (group + probeCount) & t.groupMask
   939  		if debug && probeCount >= uint64(len(t.slots)/16) {
   940  			panic(fmt.Sprintf("impossible: probeCount: %d groups: %d underlying table len: %d", probeCount, len(t.slots)/16, len(t.slots)))
   941  		}
   942  	}
   943  }
   944  
   945  // h2 returns the 7 bits immediately above the bits covered by the table's groupMask
   946  func (t *fixedTable) h2(h uint64) uint8 {
   947  	// TODO: does an extra mask here elim a shift check in the generated code?
   948  	return uint8((h >> uint64(t.h2Shift)) & 0x7f)
   949  }
   950  
   951  // reconstructHash reconstructs the bits of the original hash covered by
   952  // the table's groupMask plus an additional 7 bits. In other words, it reconstructs
   953  // the bits that we use elsewhere for h2 and the group (h1). It assumes
   954  // controlByte contains the h2 (that is, that it corresponds to a stored position).
   955  // TODO: runtime map might be able to use this approach?
   956  func (t *fixedTable) reconstructHash(controlByte byte, group uint64) uint64 {
   957  	return group | ((uint64(controlByte) & 0x7F) << uint64(t.h2Shift))
   958  }
   959  
   960  // calcTableSize returns the length to use
   961  // for the storage slices to support
   962  // capacityHint stored map elements.
   963  func calcTableSize(capacityHint int) int {
   964  	// For now, follow Go maps with max of 6.5 entries per 8 elem buckets,
   965  	// which is 81.25% max load factor, rounded up to a power of 2.
   966  	// Our current minimum size is 16.
   967  	tableSize := int(float64(capacityHint) / (6.5 / 8))
   968  	pow2 := 16
   969  	// TODO: clip max
   970  	for tableSize > pow2 {
   971  		pow2 = pow2 << 1
   972  	}
   973  	tableSize = pow2
   974  
   975  	// sanity check power of 2
   976  	if tableSize&(tableSize-1) != 0 || tableSize == 0 {
   977  		panic("impossible")
   978  	}
   979  	return tableSize
   980  }
   981  
   982  func zeroKey() Key {
   983  	return Key(0)
   984  }
   985  
   986  func zeroValue() Value {
   987  	return Value(0)
   988  }
   989  
   990  func hashUint64(k Key, seed uintptr) uint64 {
   991  	// earlier: uint64(memhash(unsafe.Pointer(&k), seed, uintptr(8)))
   992  	return uint64(memhash64(unsafe.Pointer(&k), seed))
   993  }
   994  
   995  func hashString(s string, seed uintptr) uint64 {
   996  	return uint64(strhash(unsafe.Pointer(&s), seed))
   997  }
   998  
   999  //go:linkname memhash runtime.memhash
  1000  //go:noescape
  1001  func memhash(p unsafe.Pointer, seed, s uintptr) uintptr
  1002  
  1003  //go:linkname memhash64 runtime.memhash64
  1004  //go:noescape
  1005  func memhash64(p unsafe.Pointer, seed uintptr) uintptr
  1006  
  1007  //go:linkname strhash runtime.strhash
  1008  //go:noescape
  1009  func strhash(p unsafe.Pointer, h uintptr) uintptr
  1010  
  1011  // TODO: fastrand64 did not initially work
  1012  //go:linkname fastrand runtime.fastrand
  1013  func fastrand() uint32
  1014  
  1015  func init() {
  1016  	if runtime.GOARCH != "amd64" {
  1017  		// the assembly is only amd64 without a pure Go fallback yet.
  1018  		// also, we are ignoring 32-bit in several places.
  1019  		panic("only amd64 is supported")
  1020  	}
  1021  }
  1022  
  1023  const debug = false