go.starlark.net@v0.0.0-20231101134539-556fd59b42f6/starlark/hashtable.go (about)

     1  // Copyright 2017 The Bazel Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package starlark
     6  
     7  import (
     8  	"fmt"
     9  	"math/big"
    10  	_ "unsafe" // for go:linkname hack
    11  )
    12  
    13  // hashtable is used to represent Starlark dict and set values.
    14  // It is a hash table whose key/value entries form a doubly-linked list
    15  // in the order the entries were inserted.
    16  //
    17  // Initialized instances of hashtable must not be copied.
    18  type hashtable struct {
    19  	table     []bucket  // len is zero or a power of two
    20  	bucket0   [1]bucket // inline allocation for small maps.
    21  	len       uint32
    22  	itercount uint32  // number of active iterators (ignored if frozen)
    23  	head      *entry  // insertion order doubly-linked list; may be nil
    24  	tailLink  **entry // address of nil link at end of list (perhaps &head)
    25  	frozen    bool
    26  
    27  	_ noCopy // triggers vet copylock check on this type.
    28  }
    29  
    30  // noCopy is zero-sized type that triggers vet's copylock check.
    31  // See https://github.com/golang/go/issues/8005#issuecomment-190753527.
    32  type noCopy struct{}
    33  
    34  func (*noCopy) Lock()   {}
    35  func (*noCopy) Unlock() {}
    36  
    37  const bucketSize = 8
    38  
    39  type bucket struct {
    40  	entries [bucketSize]entry
    41  	next    *bucket // linked list of buckets
    42  }
    43  
    44  type entry struct {
    45  	hash       uint32 // nonzero => in use
    46  	key, value Value
    47  	next       *entry  // insertion order doubly-linked list; may be nil
    48  	prevLink   **entry // address of link to this entry (perhaps &head)
    49  }
    50  
    51  func (ht *hashtable) init(size int) {
    52  	if size < 0 {
    53  		panic("size < 0")
    54  	}
    55  	nb := 1
    56  	for overloaded(size, nb) {
    57  		nb = nb << 1
    58  	}
    59  	if nb < 2 {
    60  		ht.table = ht.bucket0[:1]
    61  	} else {
    62  		ht.table = make([]bucket, nb)
    63  	}
    64  	ht.tailLink = &ht.head
    65  }
    66  
    67  func (ht *hashtable) freeze() {
    68  	if !ht.frozen {
    69  		ht.frozen = true
    70  		for e := ht.head; e != nil; e = e.next {
    71  			e.key.Freeze()
    72  			e.value.Freeze()
    73  		}
    74  	}
    75  }
    76  
    77  func (ht *hashtable) insert(k, v Value) error {
    78  	if err := ht.checkMutable("insert into"); err != nil {
    79  		return err
    80  	}
    81  	if ht.table == nil {
    82  		ht.init(1)
    83  	}
    84  	h, err := k.Hash()
    85  	if err != nil {
    86  		return err
    87  	}
    88  	if h == 0 {
    89  		h = 1 // zero is reserved
    90  	}
    91  
    92  retry:
    93  	var insert *entry
    94  
    95  	// Inspect each bucket in the bucket list.
    96  	p := &ht.table[h&(uint32(len(ht.table)-1))]
    97  	for {
    98  		for i := range p.entries {
    99  			e := &p.entries[i]
   100  			if e.hash != h {
   101  				if e.hash == 0 {
   102  					// Found empty entry; make a note.
   103  					insert = e
   104  				}
   105  				continue
   106  			}
   107  			if eq, err := Equal(k, e.key); err != nil {
   108  				return err // e.g. excessively recursive tuple
   109  			} else if !eq {
   110  				continue
   111  			}
   112  			// Key already present; update value.
   113  			e.value = v
   114  			return nil
   115  		}
   116  		if p.next == nil {
   117  			break
   118  		}
   119  		p = p.next
   120  	}
   121  
   122  	// Key not found.  p points to the last bucket.
   123  
   124  	// Does the number of elements exceed the buckets' load factor?
   125  	if overloaded(int(ht.len), len(ht.table)) {
   126  		ht.grow()
   127  		goto retry
   128  	}
   129  
   130  	if insert == nil {
   131  		// No space in existing buckets.  Add a new one to the bucket list.
   132  		b := new(bucket)
   133  		p.next = b
   134  		insert = &b.entries[0]
   135  	}
   136  
   137  	// Insert key/value pair.
   138  	insert.hash = h
   139  	insert.key = k
   140  	insert.value = v
   141  
   142  	// Append entry to doubly-linked list.
   143  	insert.prevLink = ht.tailLink
   144  	*ht.tailLink = insert
   145  	ht.tailLink = &insert.next
   146  
   147  	ht.len++
   148  
   149  	return nil
   150  }
   151  
   152  func overloaded(elems, buckets int) bool {
   153  	const loadFactor = 6.5 // just a guess
   154  	return elems >= bucketSize && float64(elems) >= loadFactor*float64(buckets)
   155  }
   156  
   157  func (ht *hashtable) grow() {
   158  	// Double the number of buckets and rehash.
   159  	//
   160  	// Even though this makes reentrant calls to ht.insert,
   161  	// calls Equals unnecessarily (since there can't be duplicate keys),
   162  	// and recomputes the hash unnecessarily, the gains from
   163  	// avoiding these steps were found to be too small to justify
   164  	// the extra logic: -2% on hashtable benchmark.
   165  	ht.table = make([]bucket, len(ht.table)<<1)
   166  	oldhead := ht.head
   167  	ht.head = nil
   168  	ht.tailLink = &ht.head
   169  	ht.len = 0
   170  	for e := oldhead; e != nil; e = e.next {
   171  		ht.insert(e.key, e.value)
   172  	}
   173  	ht.bucket0[0] = bucket{} // clear out unused initial bucket
   174  }
   175  
   176  func (ht *hashtable) lookup(k Value) (v Value, found bool, err error) {
   177  	h, err := k.Hash()
   178  	if err != nil {
   179  		return nil, false, err // unhashable
   180  	}
   181  	if h == 0 {
   182  		h = 1 // zero is reserved
   183  	}
   184  	if ht.table == nil {
   185  		return None, false, nil // empty
   186  	}
   187  
   188  	// Inspect each bucket in the bucket list.
   189  	for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next {
   190  		for i := range p.entries {
   191  			e := &p.entries[i]
   192  			if e.hash == h {
   193  				if eq, err := Equal(k, e.key); err != nil {
   194  					return nil, false, err // e.g. excessively recursive tuple
   195  				} else if eq {
   196  					return e.value, true, nil // found
   197  				}
   198  			}
   199  		}
   200  	}
   201  	return None, false, nil // not found
   202  }
   203  
   204  // count returns the number of distinct elements of iter that are elements of ht.
   205  func (ht *hashtable) count(iter Iterator) (int, error) {
   206  	if ht.table == nil {
   207  		return 0, nil // empty
   208  	}
   209  
   210  	var k Value
   211  	count := 0
   212  
   213  	// Use a bitset per table entry to record seen elements of ht.
   214  	// Elements are identified by their bucket number and index within the bucket.
   215  	// Each bitset gets one word initially, but may grow.
   216  	storage := make([]big.Word, len(ht.table))
   217  	bitsets := make([]big.Int, len(ht.table))
   218  	for i := range bitsets {
   219  		bitsets[i].SetBits(storage[i : i+1 : i+1])
   220  	}
   221  	for iter.Next(&k) && count != int(ht.len) {
   222  		h, err := k.Hash()
   223  		if err != nil {
   224  			return 0, err // unhashable
   225  		}
   226  		if h == 0 {
   227  			h = 1 // zero is reserved
   228  		}
   229  
   230  		// Inspect each bucket in the bucket list.
   231  		bucketId := h & (uint32(len(ht.table) - 1))
   232  		i := 0
   233  		for p := &ht.table[bucketId]; p != nil; p = p.next {
   234  			for j := range p.entries {
   235  				e := &p.entries[j]
   236  				if e.hash == h {
   237  					if eq, err := Equal(k, e.key); err != nil {
   238  						return 0, err
   239  					} else if eq {
   240  						bitIndex := i<<3 + j
   241  						if bitsets[bucketId].Bit(bitIndex) == 0 {
   242  							bitsets[bucketId].SetBit(&bitsets[bucketId], bitIndex, 1)
   243  							count++
   244  						}
   245  					}
   246  				}
   247  			}
   248  			i++
   249  		}
   250  	}
   251  
   252  	return count, nil
   253  }
   254  
   255  // Items returns all the items in the map (as key/value pairs) in insertion order.
   256  func (ht *hashtable) items() []Tuple {
   257  	items := make([]Tuple, 0, ht.len)
   258  	array := make([]Value, ht.len*2) // allocate a single backing array
   259  	for e := ht.head; e != nil; e = e.next {
   260  		pair := Tuple(array[:2:2])
   261  		array = array[2:]
   262  		pair[0] = e.key
   263  		pair[1] = e.value
   264  		items = append(items, pair)
   265  	}
   266  	return items
   267  }
   268  
   269  func (ht *hashtable) first() (Value, bool) {
   270  	if ht.head != nil {
   271  		return ht.head.key, true
   272  	}
   273  	return None, false
   274  }
   275  
   276  func (ht *hashtable) keys() []Value {
   277  	keys := make([]Value, 0, ht.len)
   278  	for e := ht.head; e != nil; e = e.next {
   279  		keys = append(keys, e.key)
   280  	}
   281  	return keys
   282  }
   283  
   284  func (ht *hashtable) delete(k Value) (v Value, found bool, err error) {
   285  	if err := ht.checkMutable("delete from"); err != nil {
   286  		return nil, false, err
   287  	}
   288  	if ht.table == nil {
   289  		return None, false, nil // empty
   290  	}
   291  	h, err := k.Hash()
   292  	if err != nil {
   293  		return nil, false, err // unhashable
   294  	}
   295  	if h == 0 {
   296  		h = 1 // zero is reserved
   297  	}
   298  
   299  	// Inspect each bucket in the bucket list.
   300  	for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next {
   301  		for i := range p.entries {
   302  			e := &p.entries[i]
   303  			if e.hash == h {
   304  				if eq, err := Equal(k, e.key); err != nil {
   305  					return nil, false, err
   306  				} else if eq {
   307  					// Remove e from doubly-linked list.
   308  					*e.prevLink = e.next
   309  					if e.next == nil {
   310  						ht.tailLink = e.prevLink // deletion of last entry
   311  					} else {
   312  						e.next.prevLink = e.prevLink
   313  					}
   314  
   315  					v := e.value
   316  					*e = entry{}
   317  					ht.len--
   318  					return v, true, nil // found
   319  				}
   320  			}
   321  		}
   322  	}
   323  
   324  	// TODO(adonovan): opt: remove completely empty bucket from bucket list.
   325  
   326  	return None, false, nil // not found
   327  }
   328  
   329  // checkMutable reports an error if the hash table should not be mutated.
   330  // verb+" dict" should describe the operation.
   331  func (ht *hashtable) checkMutable(verb string) error {
   332  	if ht.frozen {
   333  		return fmt.Errorf("cannot %s frozen hash table", verb)
   334  	}
   335  	if ht.itercount > 0 {
   336  		return fmt.Errorf("cannot %s hash table during iteration", verb)
   337  	}
   338  	return nil
   339  }
   340  
   341  func (ht *hashtable) clear() error {
   342  	if err := ht.checkMutable("clear"); err != nil {
   343  		return err
   344  	}
   345  	if ht.table != nil {
   346  		for i := range ht.table {
   347  			ht.table[i] = bucket{}
   348  		}
   349  	}
   350  	ht.head = nil
   351  	ht.tailLink = &ht.head
   352  	ht.len = 0
   353  	return nil
   354  }
   355  
   356  func (ht *hashtable) addAll(other *hashtable) error {
   357  	for e := other.head; e != nil; e = e.next {
   358  		if err := ht.insert(e.key, e.value); err != nil {
   359  			return err
   360  		}
   361  	}
   362  	return nil
   363  }
   364  
   365  // dump is provided as an aid to debugging.
   366  func (ht *hashtable) dump() {
   367  	fmt.Printf("hashtable %p len=%d head=%p tailLink=%p",
   368  		ht, ht.len, ht.head, ht.tailLink)
   369  	if ht.tailLink != nil {
   370  		fmt.Printf(" *tailLink=%p", *ht.tailLink)
   371  	}
   372  	fmt.Println()
   373  	for j := range ht.table {
   374  		fmt.Printf("bucket chain %d\n", j)
   375  		for p := &ht.table[j]; p != nil; p = p.next {
   376  			fmt.Printf("bucket %p\n", p)
   377  			for i := range p.entries {
   378  				e := &p.entries[i]
   379  				fmt.Printf("\tentry %d @ %p hash=%d key=%v value=%v\n",
   380  					i, e, e.hash, e.key, e.value)
   381  				fmt.Printf("\t\tnext=%p &next=%p prev=%p",
   382  					e.next, &e.next, e.prevLink)
   383  				if e.prevLink != nil {
   384  					fmt.Printf(" *prev=%p", *e.prevLink)
   385  				}
   386  				fmt.Println()
   387  			}
   388  		}
   389  	}
   390  }
   391  
   392  func (ht *hashtable) iterate() *keyIterator {
   393  	if !ht.frozen {
   394  		ht.itercount++
   395  	}
   396  	return &keyIterator{ht: ht, e: ht.head}
   397  }
   398  
   399  type keyIterator struct {
   400  	ht *hashtable
   401  	e  *entry
   402  }
   403  
   404  func (it *keyIterator) Next(k *Value) bool {
   405  	if it.e != nil {
   406  		*k = it.e.key
   407  		it.e = it.e.next
   408  		return true
   409  	}
   410  	return false
   411  }
   412  
   413  func (it *keyIterator) Done() {
   414  	if !it.ht.frozen {
   415  		it.ht.itercount--
   416  	}
   417  }
   418  
   419  // TODO(adonovan): use go1.19's maphash.String.
   420  
   421  // hashString computes the hash of s.
   422  func hashString(s string) uint32 {
   423  	if len(s) >= 12 {
   424  		// Call the Go runtime's optimized hash implementation,
   425  		// which uses the AESENC instruction on amd64 machines.
   426  		return uint32(goStringHash(s, 0))
   427  	}
   428  	return softHashString(s)
   429  }
   430  
   431  //go:linkname goStringHash runtime.stringHash
   432  func goStringHash(s string, seed uintptr) uintptr
   433  
   434  // softHashString computes the 32-bit FNV-1a hash of s in software.
   435  func softHashString(s string) uint32 {
   436  	var h uint32 = 2166136261
   437  	for i := 0; i < len(s); i++ {
   438  		h ^= uint32(s[i])
   439  		h *= 16777619
   440  	}
   441  	return h
   442  }