github.com/google/skylark@v0.0.0-20181101142754-a5f7082aabed/hashtable.go (about)

     1  // Copyright 2017 The Bazel Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package skylark
     6  
     7  import (
     8  	"fmt"
     9  	_ "unsafe" // for go:linkname hack
    10  )
    11  
    12  // hashtable is used to represent Skylark dict and set values.
    13  // It is a hash table whose key/value entries form a doubly-linked list
    14  // in the order the entries were inserted.
    15  type hashtable struct {
    16  	table     []bucket  // len is zero or a power of two
    17  	bucket0   [1]bucket // inline allocation for small maps.
    18  	len       uint32
    19  	itercount uint32  // number of active iterators (ignored if frozen)
    20  	head      *entry  // insertion order doubly-linked list; may be nil
    21  	tailLink  **entry // address of nil link at end of list (perhaps &head)
    22  	frozen    bool
    23  }
    24  
    25  const bucketSize = 8
    26  
    27  type bucket struct {
    28  	entries [bucketSize]entry
    29  	next    *bucket // linked list of buckets
    30  }
    31  
    32  type entry struct {
    33  	hash       uint32 // nonzero => in use
    34  	key, value Value
    35  	next       *entry  // insertion order doubly-linked list; may be nil
    36  	prevLink   **entry // address of link to this entry (perhaps &head)
    37  }
    38  
    39  func (ht *hashtable) freeze() {
    40  	if !ht.frozen {
    41  		ht.frozen = true
    42  		for i := range ht.table {
    43  			for p := &ht.table[i]; p != nil; p = p.next {
    44  				for i := range p.entries {
    45  					e := &p.entries[i]
    46  					if e.hash != 0 {
    47  						e.key.Freeze()
    48  						e.value.Freeze()
    49  					}
    50  				}
    51  			}
    52  		}
    53  	}
    54  }
    55  
    56  func (ht *hashtable) insert(k, v Value) error {
    57  	if ht.frozen {
    58  		return fmt.Errorf("cannot insert into frozen hash table")
    59  	}
    60  	if ht.itercount > 0 {
    61  		return fmt.Errorf("cannot insert into hash table during iteration")
    62  	}
    63  	if ht.table == nil {
    64  		ht.table = ht.bucket0[:1]
    65  		ht.tailLink = &ht.head
    66  	}
    67  	h, err := k.Hash()
    68  	if err != nil {
    69  		return err
    70  	}
    71  	if h == 0 {
    72  		h = 1 // zero is reserved
    73  	}
    74  
    75  retry:
    76  	var insert *entry
    77  
    78  	// Inspect each bucket in the bucket list.
    79  	p := &ht.table[h&(uint32(len(ht.table)-1))]
    80  	for {
    81  		for i := range p.entries {
    82  			e := &p.entries[i]
    83  			if e.hash != h {
    84  				if e.hash == 0 {
    85  					// Found empty entry; make a note.
    86  					insert = e
    87  				}
    88  				continue
    89  			}
    90  			if eq, err := Equal(k, e.key); err != nil {
    91  				return err // e.g. excessively recursive tuple
    92  			} else if !eq {
    93  				continue
    94  			}
    95  			// Key already present; update value.
    96  			e.value = v
    97  			return nil
    98  		}
    99  		if p.next == nil {
   100  			break
   101  		}
   102  		p = p.next
   103  	}
   104  
   105  	// Key not found.  p points to the last bucket.
   106  
   107  	// Does the number of elements exceed the buckets' load factor?
   108  	if overloaded(int(ht.len), len(ht.table)) {
   109  		ht.grow()
   110  		goto retry
   111  	}
   112  
   113  	if insert == nil {
   114  		// No space in existing buckets.  Add a new one to the bucket list.
   115  		b := new(bucket)
   116  		p.next = b
   117  		insert = &b.entries[0]
   118  	}
   119  
   120  	// Insert key/value pair.
   121  	insert.hash = h
   122  	insert.key = k
   123  	insert.value = v
   124  
   125  	// Append entry to doubly-linked list.
   126  	insert.prevLink = ht.tailLink
   127  	*ht.tailLink = insert
   128  	ht.tailLink = &insert.next
   129  
   130  	ht.len++
   131  
   132  	return nil
   133  }
   134  
   135  func overloaded(elems, buckets int) bool {
   136  	const loadFactor = 6.5 // just a guess
   137  	return elems >= bucketSize && float64(elems) >= loadFactor*float64(buckets)
   138  }
   139  
   140  func (ht *hashtable) grow() {
   141  	// Double the number of buckets and rehash.
   142  	// TODO(adonovan): opt:
   143  	// - avoid reentrant calls to ht.insert, and specialize it.
   144  	//   e.g. we know the calls to Equals will return false since
   145  	//   there are no duplicates among the old keys.
   146  	// - saving the entire hash in the bucket would avoid the need to
   147  	//   recompute the hash.
   148  	// - save the old buckets on a free list.
   149  	ht.table = make([]bucket, len(ht.table)<<1)
   150  	oldhead := ht.head
   151  	ht.head = nil
   152  	ht.tailLink = &ht.head
   153  	ht.len = 0
   154  	for e := oldhead; e != nil; e = e.next {
   155  		ht.insert(e.key, e.value)
   156  	}
   157  	ht.bucket0[0] = bucket{} // clear out unused initial bucket
   158  }
   159  
   160  func (ht *hashtable) lookup(k Value) (v Value, found bool, err error) {
   161  	h, err := k.Hash()
   162  	if err != nil {
   163  		return nil, false, err // unhashable
   164  	}
   165  	if h == 0 {
   166  		h = 1 // zero is reserved
   167  	}
   168  	if ht.table == nil {
   169  		return None, false, nil // empty
   170  	}
   171  
   172  	// Inspect each bucket in the bucket list.
   173  	for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next {
   174  		for i := range p.entries {
   175  			e := &p.entries[i]
   176  			if e.hash == h {
   177  				if eq, err := Equal(k, e.key); err != nil {
   178  					return nil, false, err // e.g. excessively recursive tuple
   179  				} else if eq {
   180  					return e.value, true, nil // found
   181  				}
   182  			}
   183  		}
   184  	}
   185  	return None, false, nil // not found
   186  }
   187  
   188  // Items returns all the items in the map (as key/value pairs) in insertion order.
   189  func (ht *hashtable) items() []Tuple {
   190  	items := make([]Tuple, 0, ht.len)
   191  	array := make([]Value, ht.len*2) // allocate a single backing array
   192  	for e := ht.head; e != nil; e = e.next {
   193  		pair := Tuple(array[:2:2])
   194  		array = array[2:]
   195  		pair[0] = e.key
   196  		pair[1] = e.value
   197  		items = append(items, pair)
   198  	}
   199  	return items
   200  }
   201  
   202  func (ht *hashtable) first() (Value, bool) {
   203  	if ht.head != nil {
   204  		return ht.head.key, true
   205  	}
   206  	return None, false
   207  }
   208  
   209  func (ht *hashtable) keys() []Value {
   210  	keys := make([]Value, 0, ht.len)
   211  	for e := ht.head; e != nil; e = e.next {
   212  		keys = append(keys, e.key)
   213  	}
   214  	return keys
   215  }
   216  
   217  func (ht *hashtable) delete(k Value) (v Value, found bool, err error) {
   218  	if ht.frozen {
   219  		return nil, false, fmt.Errorf("cannot delete from frozen hash table")
   220  	}
   221  	if ht.itercount > 0 {
   222  		return nil, false, fmt.Errorf("cannot delete from hash table during iteration")
   223  	}
   224  	if ht.table == nil {
   225  		return None, false, nil // empty
   226  	}
   227  	h, err := k.Hash()
   228  	if err != nil {
   229  		return nil, false, err // unhashable
   230  	}
   231  	if h == 0 {
   232  		h = 1 // zero is reserved
   233  	}
   234  
   235  	// Inspect each bucket in the bucket list.
   236  	for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next {
   237  		for i := range p.entries {
   238  			e := &p.entries[i]
   239  			if e.hash == h {
   240  				if eq, err := Equal(k, e.key); err != nil {
   241  					return nil, false, err
   242  				} else if eq {
   243  					// Remove e from doubly-linked list.
   244  					*e.prevLink = e.next
   245  					if e.next == nil {
   246  						ht.tailLink = e.prevLink // deletion of last entry
   247  					} else {
   248  						e.next.prevLink = e.prevLink
   249  					}
   250  
   251  					v := e.value
   252  					*e = entry{}
   253  					ht.len--
   254  					return v, true, nil // found
   255  				}
   256  			}
   257  		}
   258  	}
   259  
   260  	// TODO(adonovan): opt: remove completely empty bucket from bucket list.
   261  
   262  	return None, false, nil // not found
   263  }
   264  
   265  func (ht *hashtable) clear() error {
   266  	if ht.frozen {
   267  		return fmt.Errorf("cannot clear frozen hash table")
   268  	}
   269  	if ht.itercount > 0 {
   270  		return fmt.Errorf("cannot clear hash table during iteration")
   271  	}
   272  	if ht.table != nil {
   273  		for i := range ht.table {
   274  			ht.table[i] = bucket{}
   275  		}
   276  	}
   277  	ht.head = nil
   278  	ht.tailLink = &ht.head
   279  	ht.len = 0
   280  	return nil
   281  }
   282  
   283  // dump is provided as an aid to debugging.
   284  func (ht *hashtable) dump() {
   285  	fmt.Printf("hashtable %p len=%d head=%p tailLink=%p",
   286  		ht, ht.len, ht.head, ht.tailLink)
   287  	if ht.tailLink != nil {
   288  		fmt.Printf(" *tailLink=%p", *ht.tailLink)
   289  	}
   290  	fmt.Println()
   291  	for j := range ht.table {
   292  		fmt.Printf("bucket chain %d\n", j)
   293  		for p := &ht.table[j]; p != nil; p = p.next {
   294  			fmt.Printf("bucket %p\n", p)
   295  			for i := range p.entries {
   296  				e := &p.entries[i]
   297  				fmt.Printf("\tentry %d @ %p hash=%d key=%v value=%v\n",
   298  					i, e, e.hash, e.key, e.value)
   299  				fmt.Printf("\t\tnext=%p &next=%p prev=%p",
   300  					e.next, &e.next, e.prevLink)
   301  				if e.prevLink != nil {
   302  					fmt.Printf(" *prev=%p", *e.prevLink)
   303  				}
   304  				fmt.Println()
   305  			}
   306  		}
   307  	}
   308  }
   309  
   310  func (ht *hashtable) iterate() *keyIterator {
   311  	if !ht.frozen {
   312  		ht.itercount++
   313  	}
   314  	return &keyIterator{ht: ht, e: ht.head}
   315  }
   316  
   317  type keyIterator struct {
   318  	ht *hashtable
   319  	e  *entry
   320  }
   321  
   322  func (it *keyIterator) Next(k *Value) bool {
   323  	if it.e != nil {
   324  		*k = it.e.key
   325  		it.e = it.e.next
   326  		return true
   327  	}
   328  	return false
   329  }
   330  
   331  func (it *keyIterator) Done() {
   332  	if !it.ht.frozen {
   333  		it.ht.itercount--
   334  	}
   335  }
   336  
   337  // hashString computes the hash of s.
   338  func hashString(s string) uint32 {
   339  	if len(s) >= 12 {
   340  		// Call the Go runtime's optimized hash implementation,
   341  		// which uses the AESENC instruction on amd64 machines.
   342  		return uint32(goStringHash(s, 0))
   343  	}
   344  	return softHashString(s)
   345  }
   346  
   347  //go:linkname goStringHash runtime.stringHash
   348  func goStringHash(s string, seed uintptr) uintptr
   349  
   350  // softHashString computes the FNV hash of s in software.
   351  func softHashString(s string) uint32 {
   352  	var h uint32
   353  	for i := 0; i < len(s); i++ {
   354  		h ^= uint32(s[i])
   355  		h *= 16777619
   356  	}
   357  	return h
   358  }