github.com/saintwish/kv@v1.0.4/swiss/map.go (about)

     1  // From https://github.com/dolthub/swiss
     2  // Copyright 2023 Dolthub, Inc.
     3  //
     4  // Licensed under the Apache License, Version 2.0 (the "License");
     5  // you may not use this file except in compliance with the License.
     6  // You may obtain a copy of the License at
     7  //
     8  //     http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  package swiss
    17  
    18  import (
    19  	"github.com/dolthub/maphash"
    20  )
    21  
    22  const (
    23  	maxLoadFactor = float32(maxAvgGroupLoad) / float32(groupSize)
    24  )
    25  
    26  // Map is an open-addressing hash map
    27  // based on Abseil's flat_hash_map.
    28  type Map[K comparable, V any] struct {
    29  	ctrl     []metadata
    30  	groups   []group[K, V]
    31  	hash     maphash.Hasher[K]
    32  	resident uint32
    33  	dead     uint32
    34  	limit    uint32
    35  }
    36  
    37  // metadata is the h2 metadata array for a group.
    38  // find operations first probe the controls bytes
    39  // to filter candidates before matching keys
    40  type metadata [groupSize]int8
    41  
    42  // group is a group of 16 key-value pairs
    43  type group[K comparable, V any] struct {
    44  	keys   [groupSize]K
    45  	values [groupSize]V
    46  }
    47  
    48  const (
    49  	h1Mask    uint64 = 0xffff_ffff_ffff_ff80
    50  	h2Mask    uint64 = 0x0000_0000_0000_007f
    51  	empty     int8   = -128 // 0b1000_0000
    52  	tombstone int8   = -2   // 0b1111_1110
    53  )
    54  
    55  // h1 is a 57 bit hash prefix
    56  type h1 uint64
    57  
    58  // h2 is a 7 bit hash suffix
    59  type h2 int8
    60  
    61  // NewMap constructs a Map.
    62  func NewMap[K comparable, V any](sz uint32) (m *Map[K, V]) {
    63  	groups := numGroups(sz)
    64  	m = &Map[K, V]{
    65  		ctrl:   make([]metadata, groups),
    66  		groups: make([]group[K, V], groups),
    67  		hash:   maphash.NewHasher[K](),
    68  		limit:  groups * maxAvgGroupLoad,
    69  	}
    70  	for i := range m.ctrl {
    71  		m.ctrl[i] = newEmptyMetadata()
    72  	}
    73  	return
    74  }
    75  
    76  // Has returns true if |key| is present in |m|.
    77  func (m *Map[K, V]) Has(key K) (ok bool) {
    78  	hi, lo := splitHash(m.hash.Hash(key))
    79  	g := probeStart(hi, len(m.groups))
    80  	for { // inlined find loop
    81  		matches := metaMatchH2(&m.ctrl[g], lo)
    82  		for matches != 0 {
    83  			s := nextMatch(&matches)
    84  			if key == m.groups[g].keys[s] {
    85  				ok = true
    86  				return
    87  			}
    88  		}
    89  		// |key| is not in group |g|,
    90  		// stop probing if we see an empty slot
    91  		matches = metaMatchEmpty(&m.ctrl[g])
    92  		if matches != 0 {
    93  			ok = false
    94  			return
    95  		}
    96  		g += 1 // linear probing
    97  		if g >= uint32(len(m.groups)) {
    98  			g = 0
    99  		}
   100  	}
   101  }
   102  
   103  // Get returns the |value|.
   104  func (m *Map[K, V]) Get(key K) (value V) {
   105  	hi, lo := splitHash(m.hash.Hash(key))
   106  	g := probeStart(hi, len(m.groups))
   107  	for { // inlined find loop
   108  		matches := metaMatchH2(&m.ctrl[g], lo)
   109  		for matches != 0 {
   110  			s := nextMatch(&matches)
   111  			if key == m.groups[g].keys[s] {
   112  				value, _ = m.groups[g].values[s], true
   113  				return
   114  			}
   115  		}
   116  		// |key| is not in group |g|,
   117  		// stop probing if we see an empty slot
   118  		matches = metaMatchEmpty(&m.ctrl[g])
   119  		if matches != 0 {
   120  			return
   121  		}
   122  		g += 1 // linear probing
   123  		if g >= uint32(len(m.groups)) {
   124  			g = 0
   125  		}
   126  	}
   127  }
   128  
   129  // GetHas returns the |value| and |ok| mapped by |key|.
   130  func (m *Map[K, V]) GetHas(key K) (ok bool, value V) {
   131  	hi, lo := splitHash(m.hash.Hash(key))
   132  	g := probeStart(hi, len(m.groups))
   133  	for { // inlined find loop
   134  		matches := metaMatchH2(&m.ctrl[g], lo)
   135  		for matches != 0 {
   136  			s := nextMatch(&matches)
   137  			if key == m.groups[g].keys[s] {
   138  				value, ok = m.groups[g].values[s], true
   139  				return
   140  			}
   141  		}
   142  		// |key| is not in group |g|,
   143  		// stop probing if we see an empty slot
   144  		matches = metaMatchEmpty(&m.ctrl[g])
   145  		if matches != 0 {
   146  			ok = false
   147  			return
   148  		}
   149  		g += 1 // linear probing
   150  		if g >= uint32(len(m.groups)) {
   151  			g = 0
   152  		}
   153  	}
   154  }
   155  
   156  // Put attempts to insert |key| and |value|
   157  func (m *Map[K, V]) Set(key K, value V) {
   158  	if m.resident >= m.limit {
   159  		m.rehash(m.nextSize())
   160  	}
   161  	hi, lo := splitHash(m.hash.Hash(key))
   162  	g := probeStart(hi, len(m.groups))
   163  	for { // inlined find loop
   164  		matches := metaMatchH2(&m.ctrl[g], lo)
   165  		for matches != 0 {
   166  			s := nextMatch(&matches)
   167  			if key == m.groups[g].keys[s] { // update
   168  				m.groups[g].keys[s] = key
   169  				m.groups[g].values[s] = value
   170  				return
   171  			}
   172  		}
   173  		// |key| is not in group |g|,
   174  		// stop probing if we see an empty slot
   175  		matches = metaMatchEmpty(&m.ctrl[g])
   176  		if matches != 0 { // insert
   177  			s := nextMatch(&matches)
   178  			m.groups[g].keys[s] = key
   179  			m.groups[g].values[s] = value
   180  			m.ctrl[g][s] = int8(lo)
   181  			m.resident++
   182  			return
   183  		}
   184  		g += 1 // linear probing
   185  		if g >= uint32(len(m.groups)) {
   186  			g = 0
   187  		}
   188  	}
   189  }
   190  
   191  // Delete attempts to remove |key|, returns true if successful and the item.
   192  func (m *Map[K, V]) Delete(key K) (ok bool, value V) {
   193  	hi, lo := splitHash(m.hash.Hash(key))
   194  	g := probeStart(hi, len(m.groups))
   195  	for {
   196  		matches := metaMatchH2(&m.ctrl[g], lo)
   197  		for matches != 0 {
   198  			s := nextMatch(&matches)
   199  			if key == m.groups[g].keys[s] {
   200  				ok, value = true, m.groups[g].values[s]
   201  				// optimization: if |m.ctrl[g]| contains any empty
   202  				// metadata bytes, we can physically delete |key|
   203  				// rather than placing a tombstone.
   204  				// The observation is that any probes into group |g|
   205  				// would already be terminated by the existing empty
   206  				// slot, and therefore reclaiming slot |s| will not
   207  				// cause premature termination of probes into |g|.
   208  				if metaMatchEmpty(&m.ctrl[g]) != 0 {
   209  					m.ctrl[g][s] = empty
   210  					m.resident--
   211  				} else {
   212  					m.ctrl[g][s] = tombstone
   213  					m.dead++
   214  				}
   215  				var k K
   216  				var v V
   217  				m.groups[g].keys[s] = k
   218  				m.groups[g].values[s] = v
   219  				return
   220  			}
   221  		}
   222  		// |key| is not in group |g|,
   223  		// stop probing if we see an empty slot
   224  		matches = metaMatchEmpty(&m.ctrl[g])
   225  		if matches != 0 { // |key| absent
   226  			var v V
   227  			ok, value = false, v
   228  			return
   229  		}
   230  		g += 1 // linear probing
   231  		if g >= uint32(len(m.groups)) {
   232  			g = 0
   233  		}
   234  	}
   235  }
   236  
   237  // Iter iterates the elements of the Map, passing them to the callback.
   238  // It guarantees that any key in the Map will be visited only once, and
   239  // for un-mutated Maps, every key will be visited once. If the Map is
   240  // Mutated during iteration, mutations will be reflected on return from
   241  // Iter, but the set of keys visited by Iter is non-deterministic.
   242  func (m *Map[K, V]) Iter(cb func(k K, v V) (stop bool)) {
   243  	// take a consistent view of the table in case
   244  	// we rehash during iteration
   245  	ctrl, groups := m.ctrl, m.groups
   246  	// pick a random starting group
   247  	g := randIntN(len(groups))
   248  	for n := 0; n < len(groups); n++ {
   249  		for s, c := range ctrl[g] {
   250  			if c == empty || c == tombstone {
   251  				continue
   252  			}
   253  			k, v := groups[g].keys[s], groups[g].values[s]
   254  			if stop := cb(k, v); stop {
   255  				return
   256  			}
   257  		}
   258  		g++
   259  		if g >= uint32(len(groups)) {
   260  			g = 0
   261  		}
   262  	}
   263  }
   264  
   265  // Clear removes all elements from the Map.
   266  func (m *Map[K, V]) Clear() {
   267  	for i, c := range m.ctrl {
   268  		for j := range c {
   269  			m.ctrl[i][j] = empty
   270  		}
   271  	}
   272  	var k K
   273  	var v V
   274  	for i := range m.groups {
   275  		g := &m.groups[i]
   276  		for i := range g.keys {
   277  			g.keys[i] = k
   278  			g.values[i] = v
   279  		}
   280  	}
   281  	m.resident, m.dead = 0, 0
   282  }
   283  
   284  // Count returns the number of elements in the Map.
   285  func (m *Map[K, V]) Count() int {
   286  	return int(m.resident - m.dead)
   287  }
   288  
   289  // Capacity returns the number of additional elements
   290  // the can be added to the Map before resizing.
   291  func (m *Map[K, V]) Capacity() int {
   292  	return int(m.limit - m.resident)
   293  }
   294  
   295  // MaxCapacity returns max number elements
   296  // that can be added to the Map before resizing.
   297  func (m *Map[K, V]) MaxCapacity() int {
   298  	return int(m.limit)
   299  }
   300  
   301  // find returns the location of |key| if present, or its insertion location if absent.
   302  // for performance, find is manually inlined into public methods.
   303  func (m *Map[K, V]) find(key K, hi h1, lo h2) (g, s uint32, ok bool) {
   304  	g = probeStart(hi, len(m.groups))
   305  	for {
   306  		matches := metaMatchH2(&m.ctrl[g], lo)
   307  		for matches != 0 {
   308  			s = nextMatch(&matches)
   309  			if key == m.groups[g].keys[s] {
   310  				return g, s, true
   311  			}
   312  		}
   313  		// |key| is not in group |g|,
   314  		// stop probing if we see an empty slot
   315  		matches = metaMatchEmpty(&m.ctrl[g])
   316  		if matches != 0 {
   317  			s = nextMatch(&matches)
   318  			return g, s, false
   319  		}
   320  		g += 1 // linear probing
   321  		if g >= uint32(len(m.groups)) {
   322  			g = 0
   323  		}
   324  	}
   325  }
   326  
   327  func (m *Map[K, V]) nextSize() (n uint32) {
   328  	n = uint32(len(m.groups)) * 2
   329  	if m.dead >= (m.resident / 2) {
   330  		n = uint32(len(m.groups))
   331  	}
   332  	return
   333  }
   334  
   335  func (m *Map[K, V]) rehash(n uint32) {
   336  	groups, ctrl := m.groups, m.ctrl
   337  	m.groups = make([]group[K, V], n)
   338  	m.ctrl = make([]metadata, n)
   339  	for i := range m.ctrl {
   340  		m.ctrl[i] = newEmptyMetadata()
   341  	}
   342  	m.hash = maphash.NewSeed(m.hash)
   343  	m.limit = n * maxAvgGroupLoad
   344  	m.resident, m.dead = 0, 0
   345  	for g := range ctrl {
   346  		for s := range ctrl[g] {
   347  			c := ctrl[g][s]
   348  			if c == empty || c == tombstone {
   349  				continue
   350  			}
   351  			m.Set(groups[g].keys[s], groups[g].values[s])
   352  		}
   353  	}
   354  }
   355  
   356  func (m *Map[K, V]) loadFactor() float32 {
   357  	slots := float32(len(m.groups) * groupSize)
   358  	return float32(m.resident-m.dead) / slots
   359  }
   360  
   361  // numGroups returns the minimum number of groups needed to store |n| elems.
   362  func numGroups(n uint32) (groups uint32) {
   363  	groups = (n + maxAvgGroupLoad - 1) / maxAvgGroupLoad
   364  	if groups == 0 {
   365  		groups = 1
   366  	}
   367  	return
   368  }
   369  
   370  func newEmptyMetadata() (meta metadata) {
   371  	for i := range meta {
   372  		meta[i] = empty
   373  	}
   374  	return
   375  }
   376  
   377  func splitHash(h uint64) (h1, h2) {
   378  	return h1((h & h1Mask) >> 7), h2(h & h2Mask)
   379  }
   380  
   381  func probeStart(hi h1, groups int) uint32 {
   382  	return fastModN(uint32(hi), uint32(groups))
   383  }
   384  
   385  // lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
   386  func fastModN(x, n uint32) uint32 {
   387  	return uint32((uint64(x) * uint64(n)) >> 32)
   388  }
   389  
   390  // randIntN returns a random number in the interval [0, n).
   391  func randIntN(n int) uint32 {
   392  	return fastModN(fastrand(), uint32(n))
   393  }