github.com/iainanderson83/datastructures@v0.0.4-0.20191103204413-889e20b53bcf/hashmap/hashmap.go

github.com/iainanderson83/datastructures@v0.0.4-0.20191103204413-889e20b53bcf/hashmap/hashmap.go (about)

     1  // runtime.memhash implementation taken from:
     2  // https://github.com/dgraph-io/ristretto/blob/master/z/rtutil.go
     3  // License can be found:
     4  // https://raw.githubusercontent.com/dgraph-io/ristretto/master/LICENSE
     5  
     6  package hashmap
     7  
     8  import (
     9  	"fmt"
    10  	"strings"
    11  	"sync/atomic"
    12  
    13  	"github.com/cespare/xxhash"
    14  	"github.com/segmentio/fasthash/fnv1a"
    15  )
    16  
    17  var (
    18  	// make them var for tests
    19  	loadFactor = 0.75 // if this goes below 0.5 the bounds won't work
    20  	length     = 3
    21  )
    22  
    23  type entry struct {
    24  	hash  uint64
    25  	key   string
    26  	value interface{}
    27  }
    28  
    29  // Hashmap is a naive implementation of a hashmap struct.
    30  type Hashmap struct {
    31  	lbound int
    32  	ubound int
    33  	length int
    34  	fn     func(string) uint64
    35  
    36  	lock    uintptr
    37  	buckets [][8]entry
    38  }
    39  
    40  // NewFNV1aHashmap returns a hashmap using the fnv1a
    41  // hashing function.
    42  func NewFNV1aHashmap() *Hashmap {
    43  	return NewHashmap(fnv1a.HashString64)
    44  }
    45  
    46  // NewXXHashmap returns a hashmap using the xxhash
    47  // hashing function.
    48  func NewXXHashmap() *Hashmap {
    49  	return NewHashmap(xxhash.Sum64String)
    50  }
    51  
    52  // NewHashmap creates a new, empty, hashmap.
    53  func NewHashmap(fn func(string) uint64) *Hashmap {
    54  	return newWithCap(fn, 1<<length)
    55  }
    56  
    57  func newWithCap(fn func(string) uint64, cap int) *Hashmap {
    58  	h := &Hashmap{
    59  		lbound:  int(float64(int(1)<<length) * (1 - loadFactor)),
    60  		ubound:  int(float64(int(1)<<length) * loadFactor),
    61  		length:  cap,
    62  		buckets: make([][8]entry, cap),
    63  		fn:      fn,
    64  	}
    65  
    66  	if h.lbound == 1<<length || h.ubound == 1<<length {
    67  		panic("invalid load factor")
    68  	}
    69  
    70  	return h
    71  }
    72  
    73  // Add inserts the value v associated with the key k into the hashmap.
    74  // Redistribution of keys occurs if load factor is surpassed.
    75  func (h *Hashmap) Add(k string, v interface{}) bool {
    76  	for {
    77  		if atomic.CompareAndSwapUintptr(&h.lock, 0, 1) {
    78  			break
    79  		}
    80  	}
    81  
    82  	hash := h.fn(k)
    83  	idx := hash & (uint64(h.length) - 1)
    84  
    85  	var (
    86  		exists bool
    87  		length int
    88  		target = -1
    89  	)
    90  	for i := range h.buckets[idx] {
    91  		if h.buckets[idx][i].hash == hash {
    92  			h.buckets[idx][i].value = v
    93  			exists = true
    94  		}
    95  
    96  		if h.buckets[idx][i].hash <= 0 {
    97  			if target == -1 {
    98  				target = i
    99  			}
   100  		} else {
   101  			length++
   102  		}
   103  	}
   104  
   105  	if !exists {
   106  		if target == -1 {
   107  			target = length
   108  		}
   109  		h.buckets[idx][target].hash = hash
   110  		h.buckets[idx][target].key = k
   111  		h.buckets[idx][target].value = v
   112  	}
   113  
   114  	// Assume even distribution
   115  	if length >= h.ubound {
   116  		h.length *= 2
   117  		h.resize()
   118  	}
   119  
   120  	atomic.StoreUintptr(&h.lock, 0)
   121  	return !exists
   122  }
   123  
   124  // Delete removes the key from the map, if it exists,
   125  // and returns whether or not it was deleted.
   126  func (h *Hashmap) Delete(k string) bool {
   127  	for {
   128  		if atomic.CompareAndSwapUintptr(&h.lock, 0, 1) {
   129  			break
   130  		}
   131  	}
   132  
   133  	hash := h.fn(k)
   134  	idx := hash & (uint64(h.length) - 1)
   135  
   136  	var (
   137  		exists bool
   138  		length int
   139  	)
   140  	for i := range h.buckets[idx] {
   141  		if h.buckets[idx][i].hash == hash {
   142  			h.buckets[idx][i].hash = 0
   143  			h.buckets[idx][i].key = ""
   144  			h.buckets[idx][i].value = nil
   145  			exists = true
   146  		}
   147  
   148  		if h.buckets[idx][i].hash > 0 {
   149  			length++
   150  		}
   151  	}
   152  
   153  	if length <= h.lbound {
   154  		h.length /= 2
   155  		h.resize()
   156  	}
   157  
   158  	atomic.StoreUintptr(&h.lock, 0)
   159  	return exists
   160  }
   161  
   162  // Lookup will try to retrieve the value associated with
   163  // the specified key.
   164  func (h *Hashmap) Lookup(k string) (interface{}, bool) {
   165  	for {
   166  		if atomic.CompareAndSwapUintptr(&h.lock, 0, 1) {
   167  			break
   168  		}
   169  	}
   170  
   171  	hash := h.fn(k)
   172  	idx := hash & (uint64(h.length) - 1)
   173  
   174  	for i := range h.buckets[idx] {
   175  		if h.buckets[idx][i].hash == hash {
   176  			atomic.StoreUintptr(&h.lock, 0)
   177  			return h.buckets[idx][i].value, true
   178  		}
   179  	}
   180  
   181  	atomic.StoreUintptr(&h.lock, 0)
   182  	return nil, false
   183  }
   184  
   185  // Iter calls the provided cb for each key/value pair in the map.
   186  func (h *Hashmap) Iter(fn func(k string, v interface{}) bool) {
   187  	if fn == nil {
   188  		return
   189  	}
   190  
   191  	for {
   192  		if atomic.CompareAndSwapUintptr(&h.lock, 0, 1) {
   193  			break
   194  		}
   195  	}
   196  
   197  	for i := range h.buckets {
   198  		for j := range h.buckets[i] {
   199  			if !fn(h.buckets[i][j].key, h.buckets[i][j].value) {
   200  				atomic.StoreUintptr(&h.lock, 0)
   201  				return
   202  			}
   203  		}
   204  	}
   205  
   206  	atomic.StoreUintptr(&h.lock, 0)
   207  }
   208  
   209  // Len returns the number of elements in the map.
   210  func (h *Hashmap) Len() int {
   211  	for {
   212  		if atomic.CompareAndSwapUintptr(&h.lock, 0, 1) {
   213  			break
   214  		}
   215  	}
   216  
   217  	var length int
   218  	for i := range h.buckets {
   219  		for j := range h.buckets[i] {
   220  			if h.buckets[i][j].hash > 0 {
   221  				length++
   222  			}
   223  		}
   224  	}
   225  
   226  	atomic.StoreUintptr(&h.lock, 0)
   227  	return length
   228  }
   229  
   230  func (h *Hashmap) resize() {
   231  	buckets := make([][8]entry, h.length)
   232  
   233  	for i := range h.buckets {
   234  		var length int
   235  		for j := range h.buckets[i] {
   236  			if h.buckets[i][j].hash == 0 {
   237  				continue
   238  			}
   239  			idx := h.buckets[i][j].hash & (uint64(h.length) - 1)
   240  
   241  			buckets[idx][length] = h.buckets[i][j]
   242  			length++
   243  		}
   244  	}
   245  
   246  	h.buckets = buckets
   247  }
   248  
   249  func spew(buckets [][8]entry) string {
   250  	var lengths []int
   251  	for i := range buckets {
   252  		var length int
   253  		for j := range buckets[i] {
   254  			if buckets[i][j].hash != 0 {
   255  				length++
   256  			}
   257  		}
   258  		lengths = append(lengths, int(length))
   259  	}
   260  
   261  	var b strings.Builder
   262  	for i := range lengths {
   263  		b.WriteString(fmt.Sprintf("[%d]", lengths[i]))
   264  	}
   265  	return b.String()
   266  }