github.com/mhmtszr/concurrent-swiss-map@v1.0.8/swiss/map.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package swiss
    16  
    17  import (
    18  	"github.com/mhmtszr/concurrent-swiss-map/maphash"
    19  )
    20  
    21  const (
    22  	maxLoadFactor = float32(maxAvgGroupLoad) / float32(groupSize)
    23  )
    24  
    25  // Map is an open-addressing hash map
    26  // based on Abseil's flat_hash_map.
    27  type Map[K comparable, V any] struct {
    28  	ctrl     []metadata
    29  	groups   []group[K, V]
    30  	hash     maphash.Hasher[K]
    31  	resident uint32
    32  	dead     uint32
    33  	limit    uint32
    34  }
    35  
    36  // metadata is the h2 metadata array for a group.
    37  // find operations first probe the controls bytes
    38  // to filter candidates before matching keys
    39  type metadata [groupSize]int8
    40  
    41  // group is a group of 16 key-value pairs
    42  type group[K comparable, V any] struct {
    43  	keys   [groupSize]K
    44  	values [groupSize]V
    45  }
    46  
    47  const (
    48  	h1Mask    uint64 = 0xffff_ffff_ffff_ff80
    49  	h2Mask    uint64 = 0x0000_0000_0000_007f
    50  	empty     int8   = -128 // 0b1000_0000
    51  	tombstone int8   = -2   // 0b1111_1110
    52  )
    53  
    54  // h1 is a 57 bit hash prefix
    55  type h1 uint64
    56  
    57  // h2 is a 7 bit hash suffix
    58  type h2 int8
    59  
    60  // NewMap constructs a Map.
    61  func NewMap[K comparable, V any](sz uint32) (m *Map[K, V]) {
    62  	groups := numGroups(sz)
    63  	m = &Map[K, V]{
    64  		ctrl:   make([]metadata, groups),
    65  		groups: make([]group[K, V], groups),
    66  		hash:   maphash.NewHasher[K](),
    67  		limit:  groups * maxAvgGroupLoad,
    68  	}
    69  	for i := range m.ctrl {
    70  		m.ctrl[i] = newEmptyMetadata()
    71  	}
    72  	return
    73  }
    74  
    75  func (m *Map[K, V]) HasWithHash(key K, hash uint64) (ok bool) {
    76  	hi, lo := splitHash(hash)
    77  	g := probeStart(hi, len(m.groups))
    78  	for { // inlined find loop
    79  		matches := metaMatchH2(&m.ctrl[g], lo)
    80  		for matches != 0 {
    81  			s := nextMatch(&matches)
    82  			if key == m.groups[g].keys[s] {
    83  				ok = true
    84  				return
    85  			}
    86  		}
    87  		// |key| is not in group |g|,
    88  		// stop probing if we see an empty slot
    89  		matches = metaMatchEmpty(&m.ctrl[g])
    90  		if matches != 0 {
    91  			ok = false
    92  			return
    93  		}
    94  		g++ // linear probing
    95  		if g >= uint32(len(m.groups)) {
    96  			g = 0
    97  		}
    98  	}
    99  }
   100  
   101  func (m *Map[K, V]) GetWithHash(key K, hash uint64) (value V, ok bool) {
   102  	hi, lo := splitHash(hash)
   103  	g := probeStart(hi, len(m.groups))
   104  	for { // inlined find loop
   105  		matches := metaMatchH2(&m.ctrl[g], lo)
   106  		for matches != 0 {
   107  			s := nextMatch(&matches)
   108  			if key == m.groups[g].keys[s] {
   109  				value, ok = m.groups[g].values[s], true
   110  				return
   111  			}
   112  		}
   113  		// |key| is not in group |g|,
   114  		// stop probing if we see an empty slot
   115  		matches = metaMatchEmpty(&m.ctrl[g])
   116  		if matches != 0 {
   117  			ok = false
   118  			return
   119  		}
   120  		g++ // linear probing
   121  		if g >= uint32(len(m.groups)) {
   122  			g = 0
   123  		}
   124  	}
   125  }
   126  
   127  // Put attempts to insert |key| and |value|
   128  func (m *Map[K, V]) Put(key K, value V) {
   129  	if m.resident >= m.limit {
   130  		m.rehash(m.nextSize())
   131  	}
   132  	hi, lo := splitHash(m.hash.Hash(key))
   133  	g := probeStart(hi, len(m.groups))
   134  	for { // inlined find loop
   135  		matches := metaMatchH2(&m.ctrl[g], lo)
   136  		for matches != 0 {
   137  			s := nextMatch(&matches)
   138  			if key == m.groups[g].keys[s] { // update
   139  				m.groups[g].keys[s] = key
   140  				m.groups[g].values[s] = value
   141  				return
   142  			}
   143  		}
   144  		// |key| is not in group |g|,
   145  		// stop probing if we see an empty slot
   146  		matches = metaMatchEmpty(&m.ctrl[g])
   147  		if matches != 0 { // insert
   148  			s := nextMatch(&matches)
   149  			m.groups[g].keys[s] = key
   150  			m.groups[g].values[s] = value
   151  			m.ctrl[g][s] = int8(lo)
   152  			m.resident++
   153  			return
   154  		}
   155  		g++ // linear probing
   156  		if g >= uint32(len(m.groups)) {
   157  			g = 0
   158  		}
   159  	}
   160  }
   161  
   162  // Put attempts to insert |key| and |value|
   163  func (m *Map[K, V]) PutWithHash(key K, value V, hash uint64) {
   164  	if m.resident >= m.limit {
   165  		m.rehash(m.nextSize())
   166  	}
   167  	hi, lo := splitHash(hash)
   168  	g := probeStart(hi, len(m.groups))
   169  	for { // inlined find loop
   170  		matches := metaMatchH2(&m.ctrl[g], lo)
   171  		for matches != 0 {
   172  			s := nextMatch(&matches)
   173  			if key == m.groups[g].keys[s] { // update
   174  				m.groups[g].keys[s] = key
   175  				m.groups[g].values[s] = value
   176  				return
   177  			}
   178  		}
   179  		// |key| is not in group |g|,
   180  		// stop probing if we see an empty slot
   181  		matches = metaMatchEmpty(&m.ctrl[g])
   182  		if matches != 0 { // insert
   183  			s := nextMatch(&matches)
   184  			m.groups[g].keys[s] = key
   185  			m.groups[g].values[s] = value
   186  			m.ctrl[g][s] = int8(lo)
   187  			m.resident++
   188  			return
   189  		}
   190  		g++ // linear probing
   191  		if g >= uint32(len(m.groups)) {
   192  			g = 0
   193  		}
   194  	}
   195  }
   196  
   197  func (m *Map[K, V]) DeleteWithHash(key K, hash uint64) (ok bool) {
   198  	hi, lo := splitHash(hash)
   199  	g := probeStart(hi, len(m.groups))
   200  	for {
   201  		matches := metaMatchH2(&m.ctrl[g], lo)
   202  		for matches != 0 {
   203  			s := nextMatch(&matches)
   204  			if key == m.groups[g].keys[s] {
   205  				ok = true
   206  				// optimization: if |m.ctrl[g]| contains any empty
   207  				// metadata bytes, we can physically delete |key|
   208  				// rather than placing a tombstone.
   209  				// The observation is that any probes into group |g|
   210  				// would already be terminated by the existing empty
   211  				// slot, and therefore reclaiming slot |s| will not
   212  				// cause premature termination of probes into |g|.
   213  				if metaMatchEmpty(&m.ctrl[g]) != 0 {
   214  					m.ctrl[g][s] = empty
   215  					m.resident--
   216  				} else {
   217  					m.ctrl[g][s] = tombstone
   218  					m.dead++
   219  				}
   220  				var k K
   221  				var v V
   222  				m.groups[g].keys[s] = k
   223  				m.groups[g].values[s] = v
   224  				return
   225  			}
   226  		}
   227  		// |key| is not in group |g|,
   228  		// stop probing if we see an empty slot
   229  		matches = metaMatchEmpty(&m.ctrl[g])
   230  		if matches != 0 { // |key| absent
   231  			ok = false
   232  			return
   233  		}
   234  		g++ // linear probing
   235  		if g >= uint32(len(m.groups)) {
   236  			g = 0
   237  		}
   238  	}
   239  }
   240  
   241  // Clear removes all elements from the Map.
   242  func (m *Map[K, V]) Clear() {
   243  	for i, c := range m.ctrl {
   244  		for j := range c {
   245  			m.ctrl[i][j] = empty
   246  		}
   247  	}
   248  	var k K
   249  	var v V
   250  	for i := range m.groups {
   251  		g := &m.groups[i]
   252  		for i := range g.keys {
   253  			g.keys[i] = k
   254  			g.values[i] = v
   255  		}
   256  	}
   257  	m.resident, m.dead = 0, 0
   258  }
   259  
   260  // Iter iterates the elements of the Map, passing them to the callback.
   261  // It guarantees that any key in the Map will be visited only once, and
   262  // for un-mutated Maps, every key will be visited once. If the Map is
   263  // Mutated during iteration, mutations will be reflected on return from
   264  // Iter, but the set of keys visited by Iter is non-deterministic.
   265  //
   266  //nolint:gosec
   267  func (m *Map[K, V]) Iter(cb func(k K, v V) (stop bool)) bool {
   268  	// take a consistent view of the table in case
   269  	// we rehash during iteration
   270  	ctrl, groups := m.ctrl, m.groups
   271  	// pick a random starting group
   272  	g := randIntN(len(groups))
   273  	for n := 0; n < len(groups); n++ {
   274  		for s, c := range ctrl[g] {
   275  			if c == empty || c == tombstone {
   276  				continue
   277  			}
   278  			k, v := groups[g].keys[s], groups[g].values[s]
   279  			if stop := cb(k, v); stop {
   280  				return stop
   281  			}
   282  		}
   283  		g++
   284  		if g >= uint32(len(groups)) {
   285  			g = 0
   286  		}
   287  	}
   288  	return false
   289  }
   290  
   291  // Count returns the number of elements in the Map.
   292  func (m *Map[K, V]) Count() int {
   293  	return int(m.resident - m.dead)
   294  }
   295  
   296  func (m *Map[K, V]) nextSize() (n uint32) {
   297  	n = uint32(len(m.groups)) * 2
   298  	if m.dead >= (m.resident / 2) {
   299  		n = uint32(len(m.groups))
   300  	}
   301  	return
   302  }
   303  
   304  func (m *Map[K, V]) rehash(n uint32) {
   305  	groups, ctrl := m.groups, m.ctrl
   306  	m.groups = make([]group[K, V], n)
   307  	m.ctrl = make([]metadata, n)
   308  	for i := range m.ctrl {
   309  		m.ctrl[i] = newEmptyMetadata()
   310  	}
   311  	m.hash = maphash.NewSeed(m.hash)
   312  	m.limit = n * maxAvgGroupLoad
   313  	m.resident, m.dead = 0, 0
   314  	for g := range ctrl {
   315  		for s := range ctrl[g] {
   316  			c := ctrl[g][s]
   317  			if c == empty || c == tombstone {
   318  				continue
   319  			}
   320  			m.Put(groups[g].keys[s], groups[g].values[s])
   321  		}
   322  	}
   323  }
   324  
   325  // numGroups returns the minimum number of groups needed to store |n| elems.
   326  func numGroups(n uint32) (groups uint32) {
   327  	groups = (n + maxAvgGroupLoad - 1) / maxAvgGroupLoad
   328  	if groups == 0 {
   329  		groups = 1
   330  	}
   331  	return
   332  }
   333  
   334  func newEmptyMetadata() (meta metadata) {
   335  	for i := range meta {
   336  		meta[i] = empty
   337  	}
   338  	return
   339  }
   340  
   341  func splitHash(h uint64) (h1, h2) {
   342  	return h1((h & h1Mask) >> 7), h2(h & h2Mask)
   343  }
   344  
   345  func probeStart(hi h1, groups int) uint32 {
   346  	return fastModN(uint32(hi), uint32(groups))
   347  }
   348  
   349  // lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
   350  func fastModN(x, n uint32) uint32 {
   351  	return uint32((uint64(x) * uint64(n)) >> 32)
   352  }
   353  
   354  // randIntN returns a random number in the interval [0, n).
   355  func randIntN(n int) uint32 {
   356  	return fastModN(fastrand(), uint32(n))
   357  }