github.com/dolthub/swiss@v0.2.2-0.20240312182618-f4b2babd2bc1/map.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package swiss
    16  
    17  import (
    18  	"github.com/dolthub/maphash"
    19  )
    20  
    21  const (
    22  	maxLoadFactor = float32(maxAvgGroupLoad) / float32(groupSize)
    23  )
    24  
    25  // Map is an open-addressing hash map
    26  // based on Abseil's flat_hash_map.
    27  type Map[K comparable, V any] struct {
    28  	ctrl     []metadata
    29  	groups   []group[K, V]
    30  	hash     maphash.Hasher[K]
    31  	resident uint32
    32  	dead     uint32
    33  	limit    uint32
    34  }
    35  
    36  // metadata is the h2 metadata array for a group.
    37  // find operations first probe the controls bytes
    38  // to filter candidates before matching keys
    39  type metadata [groupSize]int8
    40  
    41  // group is a group of 16 key-value pairs
    42  type group[K comparable, V any] struct {
    43  	keys   [groupSize]K
    44  	values [groupSize]V
    45  }
    46  
    47  const (
    48  	h1Mask    uint64 = 0xffff_ffff_ffff_ff80
    49  	h2Mask    uint64 = 0x0000_0000_0000_007f
    50  	empty     int8   = -128 // 0b1000_0000
    51  	tombstone int8   = -2   // 0b1111_1110
    52  )
    53  
    54  // h1 is a 57 bit hash prefix
    55  type h1 uint64
    56  
    57  // h2 is a 7 bit hash suffix
    58  type h2 int8
    59  
    60  // NewMap constructs a Map.
    61  func NewMap[K comparable, V any](sz uint32) (m *Map[K, V]) {
    62  	groups := numGroups(sz)
    63  	m = &Map[K, V]{
    64  		ctrl:   make([]metadata, groups),
    65  		groups: make([]group[K, V], groups),
    66  		hash:   maphash.NewHasher[K](),
    67  		limit:  groups * maxAvgGroupLoad,
    68  	}
    69  	for i := range m.ctrl {
    70  		m.ctrl[i] = newEmptyMetadata()
    71  	}
    72  	return
    73  }
    74  
    75  // Has returns true if |key| is present in |m|.
    76  func (m *Map[K, V]) Has(key K) (ok bool) {
    77  	hi, lo := splitHash(m.hash.Hash(key))
    78  	g := probeStart(hi, len(m.groups))
    79  	for { // inlined find loop
    80  		matches := metaMatchH2(&m.ctrl[g], lo)
    81  		for matches != 0 {
    82  			s := nextMatch(&matches)
    83  			if key == m.groups[g].keys[s] {
    84  				ok = true
    85  				return
    86  			}
    87  		}
    88  		// |key| is not in group |g|,
    89  		// stop probing if we see an empty slot
    90  		matches = metaMatchEmpty(&m.ctrl[g])
    91  		if matches != 0 {
    92  			ok = false
    93  			return
    94  		}
    95  		g += 1 // linear probing
    96  		if g >= uint32(len(m.groups)) {
    97  			g = 0
    98  		}
    99  	}
   100  }
   101  
   102  // Get returns the |value| mapped by |key| if one exists.
   103  func (m *Map[K, V]) Get(key K) (value V, ok bool) {
   104  	hi, lo := splitHash(m.hash.Hash(key))
   105  	g := probeStart(hi, len(m.groups))
   106  	for { // inlined find loop
   107  		matches := metaMatchH2(&m.ctrl[g], lo)
   108  		for matches != 0 {
   109  			s := nextMatch(&matches)
   110  			if key == m.groups[g].keys[s] {
   111  				value, ok = m.groups[g].values[s], true
   112  				return
   113  			}
   114  		}
   115  		// |key| is not in group |g|,
   116  		// stop probing if we see an empty slot
   117  		matches = metaMatchEmpty(&m.ctrl[g])
   118  		if matches != 0 {
   119  			ok = false
   120  			return
   121  		}
   122  		g += 1 // linear probing
   123  		if g >= uint32(len(m.groups)) {
   124  			g = 0
   125  		}
   126  	}
   127  }
   128  
   129  // Put attempts to insert |key| and |value|
   130  func (m *Map[K, V]) Put(key K, value V) {
   131  	if m.resident >= m.limit {
   132  		m.rehash(m.nextSize())
   133  	}
   134  	hi, lo := splitHash(m.hash.Hash(key))
   135  	g := probeStart(hi, len(m.groups))
   136  	for { // inlined find loop
   137  		matches := metaMatchH2(&m.ctrl[g], lo)
   138  		for matches != 0 {
   139  			s := nextMatch(&matches)
   140  			if key == m.groups[g].keys[s] { // update
   141  				m.groups[g].keys[s] = key
   142  				m.groups[g].values[s] = value
   143  				return
   144  			}
   145  		}
   146  		// |key| is not in group |g|,
   147  		// stop probing if we see an empty slot
   148  		matches = metaMatchEmpty(&m.ctrl[g])
   149  		if matches != 0 { // insert
   150  			s := nextMatch(&matches)
   151  			m.groups[g].keys[s] = key
   152  			m.groups[g].values[s] = value
   153  			m.ctrl[g][s] = int8(lo)
   154  			m.resident++
   155  			return
   156  		}
   157  		g += 1 // linear probing
   158  		if g >= uint32(len(m.groups)) {
   159  			g = 0
   160  		}
   161  	}
   162  }
   163  
   164  // Delete attempts to remove |key|, returns true successful.
   165  func (m *Map[K, V]) Delete(key K) (ok bool) {
   166  	hi, lo := splitHash(m.hash.Hash(key))
   167  	g := probeStart(hi, len(m.groups))
   168  	for {
   169  		matches := metaMatchH2(&m.ctrl[g], lo)
   170  		for matches != 0 {
   171  			s := nextMatch(&matches)
   172  			if key == m.groups[g].keys[s] {
   173  				ok = true
   174  				// optimization: if |m.ctrl[g]| contains any empty
   175  				// metadata bytes, we can physically delete |key|
   176  				// rather than placing a tombstone.
   177  				// The observation is that any probes into group |g|
   178  				// would already be terminated by the existing empty
   179  				// slot, and therefore reclaiming slot |s| will not
   180  				// cause premature termination of probes into |g|.
   181  				if metaMatchEmpty(&m.ctrl[g]) != 0 {
   182  					m.ctrl[g][s] = empty
   183  					m.resident--
   184  				} else {
   185  					m.ctrl[g][s] = tombstone
   186  					m.dead++
   187  				}
   188  				var k K
   189  				var v V
   190  				m.groups[g].keys[s] = k
   191  				m.groups[g].values[s] = v
   192  				return
   193  			}
   194  		}
   195  		// |key| is not in group |g|,
   196  		// stop probing if we see an empty slot
   197  		matches = metaMatchEmpty(&m.ctrl[g])
   198  		if matches != 0 { // |key| absent
   199  			ok = false
   200  			return
   201  		}
   202  		g += 1 // linear probing
   203  		if g >= uint32(len(m.groups)) {
   204  			g = 0
   205  		}
   206  	}
   207  }
   208  
   209  // Iter iterates the elements of the Map, passing them to the callback.
   210  // It guarantees that any key in the Map will be visited only once, and
   211  // for un-mutated Maps, every key will be visited once. If the Map is
   212  // Mutated during iteration, mutations will be reflected on return from
   213  // Iter, but the set of keys visited by Iter is non-deterministic.
   214  func (m *Map[K, V]) Iter(cb func(k K, v V) (stop bool)) {
   215  	// take a consistent view of the table in case
   216  	// we rehash during iteration
   217  	ctrl, groups := m.ctrl, m.groups
   218  	// pick a random starting group
   219  	g := randIntN(len(groups))
   220  	for n := 0; n < len(groups); n++ {
   221  		for s, c := range ctrl[g] {
   222  			if c == empty || c == tombstone {
   223  				continue
   224  			}
   225  			k, v := groups[g].keys[s], groups[g].values[s]
   226  			if stop := cb(k, v); stop {
   227  				return
   228  			}
   229  		}
   230  		g++
   231  		if g >= uint32(len(groups)) {
   232  			g = 0
   233  		}
   234  	}
   235  }
   236  
   237  // Clear removes all elements from the Map.
   238  func (m *Map[K, V]) Clear() {
   239  	for i, c := range m.ctrl {
   240  		for j := range c {
   241  			m.ctrl[i][j] = empty
   242  		}
   243  	}
   244  	var k K
   245  	var v V
   246  	for i := range m.groups {
   247  		g := &m.groups[i]
   248  		for i := range g.keys {
   249  			g.keys[i] = k
   250  			g.values[i] = v
   251  		}
   252  	}
   253  	m.resident, m.dead = 0, 0
   254  }
   255  
   256  // Count returns the number of elements in the Map.
   257  func (m *Map[K, V]) Count() int {
   258  	return int(m.resident - m.dead)
   259  }
   260  
   261  // Capacity returns the number of additional elements
   262  // the can be added to the Map before resizing.
   263  func (m *Map[K, V]) Capacity() int {
   264  	return int(m.limit - m.resident)
   265  }
   266  
   267  // find returns the location of |key| if present, or its insertion location if absent.
   268  // for performance, find is manually inlined into public methods.
   269  func (m *Map[K, V]) find(key K, hi h1, lo h2) (g, s uint32, ok bool) {
   270  	g = probeStart(hi, len(m.groups))
   271  	for {
   272  		matches := metaMatchH2(&m.ctrl[g], lo)
   273  		for matches != 0 {
   274  			s = nextMatch(&matches)
   275  			if key == m.groups[g].keys[s] {
   276  				return g, s, true
   277  			}
   278  		}
   279  		// |key| is not in group |g|,
   280  		// stop probing if we see an empty slot
   281  		matches = metaMatchEmpty(&m.ctrl[g])
   282  		if matches != 0 {
   283  			s = nextMatch(&matches)
   284  			return g, s, false
   285  		}
   286  		g += 1 // linear probing
   287  		if g >= uint32(len(m.groups)) {
   288  			g = 0
   289  		}
   290  	}
   291  }
   292  
   293  func (m *Map[K, V]) nextSize() (n uint32) {
   294  	n = uint32(len(m.groups)) * 2
   295  	if m.dead >= (m.resident / 2) {
   296  		n = uint32(len(m.groups))
   297  	}
   298  	return
   299  }
   300  
   301  func (m *Map[K, V]) rehash(n uint32) {
   302  	groups, ctrl := m.groups, m.ctrl
   303  	m.groups = make([]group[K, V], n)
   304  	m.ctrl = make([]metadata, n)
   305  	for i := range m.ctrl {
   306  		m.ctrl[i] = newEmptyMetadata()
   307  	}
   308  	m.hash = maphash.NewSeed(m.hash)
   309  	m.limit = n * maxAvgGroupLoad
   310  	m.resident, m.dead = 0, 0
   311  	for g := range ctrl {
   312  		for s := range ctrl[g] {
   313  			c := ctrl[g][s]
   314  			if c == empty || c == tombstone {
   315  				continue
   316  			}
   317  			m.Put(groups[g].keys[s], groups[g].values[s])
   318  		}
   319  	}
   320  }
   321  
   322  func (m *Map[K, V]) loadFactor() float32 {
   323  	slots := float32(len(m.groups) * groupSize)
   324  	return float32(m.resident-m.dead) / slots
   325  }
   326  
   327  // numGroups returns the minimum number of groups needed to store |n| elems.
   328  func numGroups(n uint32) (groups uint32) {
   329  	groups = (n + maxAvgGroupLoad - 1) / maxAvgGroupLoad
   330  	if groups == 0 {
   331  		groups = 1
   332  	}
   333  	return
   334  }
   335  
   336  func newEmptyMetadata() (meta metadata) {
   337  	for i := range meta {
   338  		meta[i] = empty
   339  	}
   340  	return
   341  }
   342  
   343  func splitHash(h uint64) (h1, h2) {
   344  	return h1((h & h1Mask) >> 7), h2(h & h2Mask)
   345  }
   346  
   347  func probeStart(hi h1, groups int) uint32 {
   348  	return fastModN(uint32(hi), uint32(groups))
   349  }
   350  
   351  // lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
   352  func fastModN(x, n uint32) uint32 {
   353  	return uint32((uint64(x) * uint64(n)) >> 32)
   354  }