github.com/lab47/exprcore@v0.0.0-20210525052339-fb7d6bd9331e/exprcore/hashtable.go (about)

     1  // Copyright 2017 The Bazel Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package exprcore
     6  
     7  import (
     8  	"fmt"
     9  	_ "unsafe" // for go:linkname hack
    10  )
    11  
    12  // hashtable is used to represent exprcore dict and set values.
    13  // It is a hash table whose key/value entries form a doubly-linked list
    14  // in the order the entries were inserted.
    15  type hashtable struct {
    16  	table     []bucket  // len is zero or a power of two
    17  	bucket0   [1]bucket // inline allocation for small maps.
    18  	len       uint32
    19  	itercount uint32  // number of active iterators (ignored if frozen)
    20  	head      *entry  // insertion order doubly-linked list; may be nil
    21  	tailLink  **entry // address of nil link at end of list (perhaps &head)
    22  	frozen    bool
    23  }
    24  
    25  const bucketSize = 8
    26  
    27  type bucket struct {
    28  	entries [bucketSize]entry
    29  	next    *bucket // linked list of buckets
    30  }
    31  
    32  type entry struct {
    33  	hash       uint32 // nonzero => in use
    34  	key, value Value
    35  	next       *entry  // insertion order doubly-linked list; may be nil
    36  	prevLink   **entry // address of link to this entry (perhaps &head)
    37  }
    38  
    39  func (ht *hashtable) init(size int) {
    40  	if size < 0 {
    41  		panic("size < 0")
    42  	}
    43  	nb := 1
    44  	for overloaded(size, nb) {
    45  		nb = nb << 1
    46  	}
    47  	if nb < 2 {
    48  		ht.table = ht.bucket0[:1]
    49  	} else {
    50  		ht.table = make([]bucket, nb)
    51  	}
    52  	ht.tailLink = &ht.head
    53  }
    54  
    55  func (ht *hashtable) freeze() {
    56  	if !ht.frozen {
    57  		ht.frozen = true
    58  		for i := range ht.table {
    59  			for p := &ht.table[i]; p != nil; p = p.next {
    60  				for i := range p.entries {
    61  					e := &p.entries[i]
    62  					if e.hash != 0 {
    63  						e.key.Freeze()
    64  						e.value.Freeze()
    65  					}
    66  				}
    67  			}
    68  		}
    69  	}
    70  }
    71  
    72  func (ht *hashtable) insert(k, v Value) error {
    73  	if ht.frozen {
    74  		return fmt.Errorf("cannot insert into frozen hash table")
    75  	}
    76  	if ht.itercount > 0 {
    77  		return fmt.Errorf("cannot insert into hash table during iteration")
    78  	}
    79  	if ht.table == nil {
    80  		ht.init(1)
    81  	}
    82  	h, err := k.Hash()
    83  	if err != nil {
    84  		return err
    85  	}
    86  	if h == 0 {
    87  		h = 1 // zero is reserved
    88  	}
    89  
    90  retry:
    91  	var insert *entry
    92  
    93  	// Inspect each bucket in the bucket list.
    94  	p := &ht.table[h&(uint32(len(ht.table)-1))]
    95  	for {
    96  		for i := range p.entries {
    97  			e := &p.entries[i]
    98  			if e.hash != h {
    99  				if e.hash == 0 {
   100  					// Found empty entry; make a note.
   101  					insert = e
   102  				}
   103  				continue
   104  			}
   105  			if eq, err := Equal(k, e.key); err != nil {
   106  				return err // e.g. excessively recursive tuple
   107  			} else if !eq {
   108  				continue
   109  			}
   110  			// Key already present; update value.
   111  			e.value = v
   112  			return nil
   113  		}
   114  		if p.next == nil {
   115  			break
   116  		}
   117  		p = p.next
   118  	}
   119  
   120  	// Key not found.  p points to the last bucket.
   121  
   122  	// Does the number of elements exceed the buckets' load factor?
   123  	if overloaded(int(ht.len), len(ht.table)) {
   124  		ht.grow()
   125  		goto retry
   126  	}
   127  
   128  	if insert == nil {
   129  		// No space in existing buckets.  Add a new one to the bucket list.
   130  		b := new(bucket)
   131  		p.next = b
   132  		insert = &b.entries[0]
   133  	}
   134  
   135  	// Insert key/value pair.
   136  	insert.hash = h
   137  	insert.key = k
   138  	insert.value = v
   139  
   140  	// Append entry to doubly-linked list.
   141  	insert.prevLink = ht.tailLink
   142  	*ht.tailLink = insert
   143  	ht.tailLink = &insert.next
   144  
   145  	ht.len++
   146  
   147  	return nil
   148  }
   149  
   150  func overloaded(elems, buckets int) bool {
   151  	const loadFactor = 6.5 // just a guess
   152  	return elems >= bucketSize && float64(elems) >= loadFactor*float64(buckets)
   153  }
   154  
   155  func (ht *hashtable) grow() {
   156  	// Double the number of buckets and rehash.
   157  	// TODO(adonovan): opt:
   158  	// - avoid reentrant calls to ht.insert, and specialize it.
   159  	//   e.g. we know the calls to Equals will return false since
   160  	//   there are no duplicates among the old keys.
   161  	// - saving the entire hash in the bucket would avoid the need to
   162  	//   recompute the hash.
   163  	// - save the old buckets on a free list.
   164  	ht.table = make([]bucket, len(ht.table)<<1)
   165  	oldhead := ht.head
   166  	ht.head = nil
   167  	ht.tailLink = &ht.head
   168  	ht.len = 0
   169  	for e := oldhead; e != nil; e = e.next {
   170  		ht.insert(e.key, e.value)
   171  	}
   172  	ht.bucket0[0] = bucket{} // clear out unused initial bucket
   173  }
   174  
   175  func (ht *hashtable) lookup(k Value) (v Value, found bool, err error) {
   176  	h, err := k.Hash()
   177  	if err != nil {
   178  		return nil, false, err // unhashable
   179  	}
   180  	if h == 0 {
   181  		h = 1 // zero is reserved
   182  	}
   183  	if ht.table == nil {
   184  		return None, false, nil // empty
   185  	}
   186  
   187  	// Inspect each bucket in the bucket list.
   188  	for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next {
   189  		for i := range p.entries {
   190  			e := &p.entries[i]
   191  			if e.hash == h {
   192  				if eq, err := Equal(k, e.key); err != nil {
   193  					return nil, false, err // e.g. excessively recursive tuple
   194  				} else if eq {
   195  					return e.value, true, nil // found
   196  				}
   197  			}
   198  		}
   199  	}
   200  	return None, false, nil // not found
   201  }
   202  
   203  // Items returns all the items in the map (as key/value pairs) in insertion order.
   204  func (ht *hashtable) items() []Tuple {
   205  	items := make([]Tuple, 0, ht.len)
   206  	array := make([]Value, ht.len*2) // allocate a single backing array
   207  	for e := ht.head; e != nil; e = e.next {
   208  		pair := Tuple(array[:2:2])
   209  		array = array[2:]
   210  		pair[0] = e.key
   211  		pair[1] = e.value
   212  		items = append(items, pair)
   213  	}
   214  	return items
   215  }
   216  
   217  func (ht *hashtable) first() (Value, bool) {
   218  	if ht.head != nil {
   219  		return ht.head.key, true
   220  	}
   221  	return None, false
   222  }
   223  
   224  func (ht *hashtable) keys() []Value {
   225  	keys := make([]Value, 0, ht.len)
   226  	for e := ht.head; e != nil; e = e.next {
   227  		keys = append(keys, e.key)
   228  	}
   229  	return keys
   230  }
   231  
   232  func (ht *hashtable) values() []Value {
   233  	values := make([]Value, 0, ht.len)
   234  	for e := ht.head; e != nil; e = e.next {
   235  		values = append(values, e.value)
   236  	}
   237  	return values
   238  }
   239  
   240  func (ht *hashtable) delete(k Value) (v Value, found bool, err error) {
   241  	if ht.frozen {
   242  		return nil, false, fmt.Errorf("cannot delete from frozen hash table")
   243  	}
   244  	if ht.itercount > 0 {
   245  		return nil, false, fmt.Errorf("cannot delete from hash table during iteration")
   246  	}
   247  	if ht.table == nil {
   248  		return None, false, nil // empty
   249  	}
   250  	h, err := k.Hash()
   251  	if err != nil {
   252  		return nil, false, err // unhashable
   253  	}
   254  	if h == 0 {
   255  		h = 1 // zero is reserved
   256  	}
   257  
   258  	// Inspect each bucket in the bucket list.
   259  	for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next {
   260  		for i := range p.entries {
   261  			e := &p.entries[i]
   262  			if e.hash == h {
   263  				if eq, err := Equal(k, e.key); err != nil {
   264  					return nil, false, err
   265  				} else if eq {
   266  					// Remove e from doubly-linked list.
   267  					*e.prevLink = e.next
   268  					if e.next == nil {
   269  						ht.tailLink = e.prevLink // deletion of last entry
   270  					} else {
   271  						e.next.prevLink = e.prevLink
   272  					}
   273  
   274  					v := e.value
   275  					*e = entry{}
   276  					ht.len--
   277  					return v, true, nil // found
   278  				}
   279  			}
   280  		}
   281  	}
   282  
   283  	// TODO(adonovan): opt: remove completely empty bucket from bucket list.
   284  
   285  	return None, false, nil // not found
   286  }
   287  
   288  func (ht *hashtable) clear() error {
   289  	if ht.frozen {
   290  		return fmt.Errorf("cannot clear frozen hash table")
   291  	}
   292  	if ht.itercount > 0 {
   293  		return fmt.Errorf("cannot clear hash table during iteration")
   294  	}
   295  	if ht.table != nil {
   296  		for i := range ht.table {
   297  			ht.table[i] = bucket{}
   298  		}
   299  	}
   300  	ht.head = nil
   301  	ht.tailLink = &ht.head
   302  	ht.len = 0
   303  	return nil
   304  }
   305  
   306  // dump is provided as an aid to debugging.
   307  func (ht *hashtable) dump() {
   308  	fmt.Printf("hashtable %p len=%d head=%p tailLink=%p",
   309  		ht, ht.len, ht.head, ht.tailLink)
   310  	if ht.tailLink != nil {
   311  		fmt.Printf(" *tailLink=%p", *ht.tailLink)
   312  	}
   313  	fmt.Println()
   314  	for j := range ht.table {
   315  		fmt.Printf("bucket chain %d\n", j)
   316  		for p := &ht.table[j]; p != nil; p = p.next {
   317  			fmt.Printf("bucket %p\n", p)
   318  			for i := range p.entries {
   319  				e := &p.entries[i]
   320  				fmt.Printf("\tentry %d @ %p hash=%d key=%v value=%v\n",
   321  					i, e, e.hash, e.key, e.value)
   322  				fmt.Printf("\t\tnext=%p &next=%p prev=%p",
   323  					e.next, &e.next, e.prevLink)
   324  				if e.prevLink != nil {
   325  					fmt.Printf(" *prev=%p", *e.prevLink)
   326  				}
   327  				fmt.Println()
   328  			}
   329  		}
   330  	}
   331  }
   332  
   333  func (ht *hashtable) iterate() *keyIterator {
   334  	if !ht.frozen {
   335  		ht.itercount++
   336  	}
   337  	return &keyIterator{ht: ht, e: ht.head}
   338  }
   339  
   340  type keyIterator struct {
   341  	ht *hashtable
   342  	e  *entry
   343  }
   344  
   345  func (it *keyIterator) Next(k *Value) bool {
   346  	if it.e != nil {
   347  		*k = it.e.key
   348  		it.e = it.e.next
   349  		return true
   350  	}
   351  	return false
   352  }
   353  
   354  func (it *keyIterator) Done() {
   355  	if !it.ht.frozen {
   356  		it.ht.itercount--
   357  	}
   358  }
   359  
   360  // hashString computes the hash of s.
   361  func hashString(s string) uint32 {
   362  	if len(s) >= 12 {
   363  		// Call the Go runtime's optimized hash implementation,
   364  		// which uses the AESENC instruction on amd64 machines.
   365  		return uint32(goStringHash(s, 0))
   366  	}
   367  	return softHashString(s)
   368  }
   369  
   370  //go:linkname goStringHash runtime.stringHash
   371  func goStringHash(s string, seed uintptr) uintptr
   372  
   373  // softHashString computes the FNV hash of s in software.
   374  func softHashString(s string) uint32 {
   375  	var h uint32
   376  	for i := 0; i < len(s); i++ {
   377  		h ^= uint32(s[i])
   378  		h *= 16777619
   379  	}
   380  	return h
   381  }