github.com/scottcagno/storage@v1.8.0/pkg/hashmap/chained/chhmap.go (about)

     1  package chained
     2  
     3  import "github.com/scottcagno/storage/pkg/hash/murmur3"
     4  
     5  const (
     6  	loadFactor     = 0.85 // load factor must exceed 50%
     7  	defaultMapSize = 16
     8  )
     9  
    10  // user specified key and value types
    11  type keyType = string
    12  type valType = []byte
    13  
    14  var keyZeroType = *new(keyType)
    15  var valZeroType = *new(valType)
    16  
    17  // entry is a key value pair that is found in each bucket
    18  type entry struct {
    19  	key keyType
    20  	val valType
    21  }
    22  
    23  // entry node is a node in part of our linked list
    24  type entryNode struct {
    25  	entry
    26  	next *entryNode
    27  }
    28  
    29  // bucket represents a single slot in the HashMap table
    30  type bucket struct {
    31  	hashkey uint64
    32  	head    *entryNode
    33  }
    34  
    35  func (b *bucket) insert(key keyType, val valType) (valType, bool) {
    36  	if val, ok := b.search(key); ok {
    37  		// already exists
    38  		return val, true
    39  	}
    40  	newNode := &entryNode{
    41  		entry: entry{
    42  			key: key,
    43  			val: val,
    44  		},
    45  		next: b.head,
    46  	}
    47  	b.head = newNode
    48  	// no previous value, so return false
    49  	return b.head.entry.val, false
    50  }
    51  
    52  func (b *bucket) search(key keyType) (valType, bool) {
    53  	current := b.head
    54  	for current != nil {
    55  		if current.entry.key == key {
    56  			return current.entry.val, true
    57  		}
    58  		current = current.next
    59  	}
    60  	return valZeroType, false
    61  }
    62  
    63  func (b *bucket) scan(it Iterator) {
    64  	current := b.head
    65  	for current != nil {
    66  		if !it(current.entry.key, current.entry.val) {
    67  			return
    68  		}
    69  		current = current.next
    70  	}
    71  }
    72  
    73  func (b *bucket) delete(key keyType) (valType, bool) {
    74  	var ret valType
    75  	if b.head.entry.key == key {
    76  		ret = b.head.entry.val
    77  		b.head = b.head.next
    78  		return ret, true
    79  	}
    80  	previous := b.head
    81  	for previous.next != nil {
    82  		if previous.next.entry.key == key {
    83  			ret = previous.next.entry.val
    84  			previous.next = previous.next.next
    85  			return ret, true
    86  		}
    87  		previous = previous.next
    88  	}
    89  	return valZeroType, false
    90  }
    91  
    92  // HashMap represents a closed hashing hashtable implementation
    93  type HashMap struct {
    94  	hash    hashFunc
    95  	mask    uint64
    96  	expand  uint
    97  	shrink  uint
    98  	keys    uint
    99  	size    uint
   100  	buckets []bucket
   101  }
   102  
   103  // alignBucketCount aligns buckets to ensure all sizes are powers of two
   104  func alignBucketCount(size uint) uint64 {
   105  	count := uint(defaultMapSize)
   106  	for count < size {
   107  		count *= 2
   108  	}
   109  	return uint64(count)
   110  }
   111  
   112  // defaultHashFunc is the default hashFunc used. This is here mainly as
   113  // a convenience for the sharded hashmap to utilize
   114  func defaultHashFunc(key keyType) uint64 {
   115  	return murmur3.Sum64([]byte(key))
   116  }
   117  
   118  // hashFunc is a type definition for what a hash function should look like
   119  type hashFunc func(key keyType) uint64
   120  
   121  // NewHashMap returns a new HashMap instantiated with the specified size or
   122  // the defaultMapSize, whichever is larger
   123  func NewHashMap(size uint) *HashMap {
   124  	return newHashMap(size, defaultHashFunc)
   125  }
   126  
   127  // newHashMap is the internal variant of the previous function
   128  // and is mainly used internally
   129  func newHashMap(size uint, hash hashFunc) *HashMap {
   130  	bukCnt := alignBucketCount(size)
   131  	if hash == nil {
   132  		hash = defaultHashFunc
   133  	}
   134  	m := &HashMap{
   135  		hash:    hash,
   136  		mask:    bukCnt - 1,
   137  		expand:  uint(float64(bukCnt) * loadFactor),
   138  		shrink:  uint(float64(bukCnt) * (1 - loadFactor)),
   139  		keys:    0,
   140  		size:    size,
   141  		buckets: make([]bucket, bukCnt),
   142  	}
   143  	return m
   144  }
   145  
   146  // resize grows or shrinks the HashMap by the newSize provided. It makes a
   147  // new map with the new size, copies everything over, and then frees the old map
   148  func (m *HashMap) resize(newSize uint) {
   149  	newHM := newHashMap(newSize, m.hash)
   150  	var buk bucket
   151  	for i := 0; i < len(m.buckets); i++ {
   152  		buk = m.buckets[i]
   153  		if buk.head != nil {
   154  			buk.scan(func(key keyType, value valType) bool {
   155  				newHM.insert(buk.hashkey, key, value)
   156  				return true
   157  			})
   158  		}
   159  	}
   160  	tsize := m.size
   161  	*m = *newHM
   162  	m.size = tsize
   163  }
   164  
   165  // Get returns a value for a given key, or returns false if none could be found
   166  // Get can be considered the exported version of the lookup call
   167  func (m *HashMap) Get(key keyType) (valType, bool) {
   168  	return m.lookup(0, key)
   169  }
   170  
   171  // lookup returns a value for a given key, or returns false if none could be found
   172  func (m *HashMap) lookup(hashkey uint64, key keyType) (valType, bool) {
   173  	// check if map is empty
   174  	if len(m.buckets) == 0 {
   175  		// hopefully this should never really happen
   176  		// do we really need to check this here?
   177  		*m = *newHashMap(defaultMapSize, m.hash)
   178  	}
   179  	if hashkey == 0 {
   180  		// calculate the hashkey value
   181  		hashkey = m.hash(key)
   182  	}
   183  	// mask the hashkey to get the initial index
   184  	i := hashkey & m.mask
   185  	// check if the chain is empty
   186  	if m.buckets[i].head == nil {
   187  		return *new(valType), false
   188  	}
   189  	// not empty, lets look for it in the list
   190  	return m.buckets[i].search(key)
   191  }
   192  
   193  // Put inserts a key value entry and returns the previous value or false
   194  // Put can be considered the exported version of the insert call
   195  func (m *HashMap) Put(key keyType, value valType) (valType, bool) {
   196  	return m.insert(0, key, value)
   197  }
   198  
   199  // insert inserts a key value entry and returns the previous value, or false
   200  func (m *HashMap) insert(hashkey uint64, key keyType, value valType) (valType, bool) {
   201  	// check if map is empty
   202  	if len(m.buckets) == 0 {
   203  		// create a new map with default size
   204  		*m = *newHashMap(defaultMapSize, m.hash)
   205  	}
   206  	// check and see if we need to resize
   207  	if m.keys >= m.expand {
   208  		// if we do, then double the map size
   209  		m.resize(uint(len(m.buckets)) * 2)
   210  	}
   211  	if hashkey == 0 {
   212  		// calculate the hashkey value
   213  		hashkey = m.hash(key)
   214  	}
   215  	// mask the hashkey to get the initial index
   216  	i := hashkey & m.mask
   217  	// insert key and value
   218  	val, ok := m.buckets[i].insert(key, value)
   219  	if !ok { // means not updated, aka a new one was inserted
   220  		m.keys++
   221  	}
   222  	return val, !ok
   223  }
   224  
   225  // Del removes a value for a given key and returns the deleted value, or false
   226  // Del can be considered the exported version of the delete call
   227  func (m *HashMap) Del(key keyType) (valType, bool) {
   228  	return m.delete(0, key)
   229  }
   230  
   231  // delete removes a value for a given key and returns the deleted value, or false
   232  func (m *HashMap) delete(hashkey uint64, key keyType) (valType, bool) {
   233  	// check if map is empty
   234  	if len(m.buckets) == 0 {
   235  		// nothing to see here folks
   236  		return valZeroType, false
   237  	}
   238  	if hashkey == 0 {
   239  		// calculate the hashkey value
   240  		hashkey = m.hash(key)
   241  	}
   242  	// mask the hashkey to get the initial index
   243  	i := hashkey & m.mask
   244  	// try deleting from the chain
   245  	val, ok := m.buckets[i].delete(key)
   246  	if ok { // means it was deleted, aka...
   247  		// ...decrement entry count
   248  		m.keys--
   249  	}
   250  	// check and see if we need to resize
   251  	if m.keys <= m.shrink && uint(len(m.buckets)) > m.size {
   252  		// if it checks out, then resize down by 25%-ish
   253  		m.resize(m.keys)
   254  	}
   255  	return val, ok
   256  }
   257  
   258  // Iterator is an iterator function type
   259  type Iterator func(key keyType, value valType) bool
   260  
   261  // Range takes an Iterator and ranges the HashMap as long as long
   262  // as the iterator function continues to be true. Range is not
   263  // safe to perform an insert or remove operation while ranging!
   264  func (m *HashMap) Range(it Iterator) {
   265  	for i := 0; i < len(m.buckets); i++ {
   266  		m.buckets[i].scan(it)
   267  	}
   268  }
   269  
   270  // PercentFull returns the current load factor of the HashMap
   271  func (m *HashMap) PercentFull() float64 {
   272  	return float64(m.keys) / float64(len(m.buckets))
   273  }
   274  
   275  // Len returns the number of entries currently in the HashMap
   276  func (m *HashMap) Len() int {
   277  	return int(m.keys)
   278  }
   279  
   280  // Close closes and frees the current hashmap. Calling any method
   281  // on the HashMap after this will most likely result in a panic
   282  func (m *HashMap) Close() {
   283  	destroy(m)
   284  }
   285  
   286  // destroy does exactly what is sounds like it does
   287  func destroy(m *HashMap) {
   288  	m = nil
   289  }