go.starlark.net@v0.0.0-20231101134539-556fd59b42f6/starlark/hashtable.go (about) 1 // Copyright 2017 The Bazel Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package starlark 6 7 import ( 8 "fmt" 9 "math/big" 10 _ "unsafe" // for go:linkname hack 11 ) 12 13 // hashtable is used to represent Starlark dict and set values. 14 // It is a hash table whose key/value entries form a doubly-linked list 15 // in the order the entries were inserted. 16 // 17 // Initialized instances of hashtable must not be copied. 18 type hashtable struct { 19 table []bucket // len is zero or a power of two 20 bucket0 [1]bucket // inline allocation for small maps. 21 len uint32 22 itercount uint32 // number of active iterators (ignored if frozen) 23 head *entry // insertion order doubly-linked list; may be nil 24 tailLink **entry // address of nil link at end of list (perhaps &head) 25 frozen bool 26 27 _ noCopy // triggers vet copylock check on this type. 28 } 29 30 // noCopy is zero-sized type that triggers vet's copylock check. 31 // See https://github.com/golang/go/issues/8005#issuecomment-190753527. 32 type noCopy struct{} 33 34 func (*noCopy) Lock() {} 35 func (*noCopy) Unlock() {} 36 37 const bucketSize = 8 38 39 type bucket struct { 40 entries [bucketSize]entry 41 next *bucket // linked list of buckets 42 } 43 44 type entry struct { 45 hash uint32 // nonzero => in use 46 key, value Value 47 next *entry // insertion order doubly-linked list; may be nil 48 prevLink **entry // address of link to this entry (perhaps &head) 49 } 50 51 func (ht *hashtable) init(size int) { 52 if size < 0 { 53 panic("size < 0") 54 } 55 nb := 1 56 for overloaded(size, nb) { 57 nb = nb << 1 58 } 59 if nb < 2 { 60 ht.table = ht.bucket0[:1] 61 } else { 62 ht.table = make([]bucket, nb) 63 } 64 ht.tailLink = &ht.head 65 } 66 67 func (ht *hashtable) freeze() { 68 if !ht.frozen { 69 ht.frozen = true 70 for e := ht.head; e != nil; e = e.next { 71 e.key.Freeze() 72 e.value.Freeze() 73 } 74 } 75 } 76 77 func (ht *hashtable) insert(k, v Value) error { 78 if err := ht.checkMutable("insert into"); err != nil { 79 return err 80 } 81 if ht.table == nil { 82 ht.init(1) 83 } 84 h, err := k.Hash() 85 if err != nil { 86 return err 87 } 88 if h == 0 { 89 h = 1 // zero is reserved 90 } 91 92 retry: 93 var insert *entry 94 95 // Inspect each bucket in the bucket list. 96 p := &ht.table[h&(uint32(len(ht.table)-1))] 97 for { 98 for i := range p.entries { 99 e := &p.entries[i] 100 if e.hash != h { 101 if e.hash == 0 { 102 // Found empty entry; make a note. 103 insert = e 104 } 105 continue 106 } 107 if eq, err := Equal(k, e.key); err != nil { 108 return err // e.g. excessively recursive tuple 109 } else if !eq { 110 continue 111 } 112 // Key already present; update value. 113 e.value = v 114 return nil 115 } 116 if p.next == nil { 117 break 118 } 119 p = p.next 120 } 121 122 // Key not found. p points to the last bucket. 123 124 // Does the number of elements exceed the buckets' load factor? 125 if overloaded(int(ht.len), len(ht.table)) { 126 ht.grow() 127 goto retry 128 } 129 130 if insert == nil { 131 // No space in existing buckets. Add a new one to the bucket list. 132 b := new(bucket) 133 p.next = b 134 insert = &b.entries[0] 135 } 136 137 // Insert key/value pair. 138 insert.hash = h 139 insert.key = k 140 insert.value = v 141 142 // Append entry to doubly-linked list. 143 insert.prevLink = ht.tailLink 144 *ht.tailLink = insert 145 ht.tailLink = &insert.next 146 147 ht.len++ 148 149 return nil 150 } 151 152 func overloaded(elems, buckets int) bool { 153 const loadFactor = 6.5 // just a guess 154 return elems >= bucketSize && float64(elems) >= loadFactor*float64(buckets) 155 } 156 157 func (ht *hashtable) grow() { 158 // Double the number of buckets and rehash. 159 // 160 // Even though this makes reentrant calls to ht.insert, 161 // calls Equals unnecessarily (since there can't be duplicate keys), 162 // and recomputes the hash unnecessarily, the gains from 163 // avoiding these steps were found to be too small to justify 164 // the extra logic: -2% on hashtable benchmark. 165 ht.table = make([]bucket, len(ht.table)<<1) 166 oldhead := ht.head 167 ht.head = nil 168 ht.tailLink = &ht.head 169 ht.len = 0 170 for e := oldhead; e != nil; e = e.next { 171 ht.insert(e.key, e.value) 172 } 173 ht.bucket0[0] = bucket{} // clear out unused initial bucket 174 } 175 176 func (ht *hashtable) lookup(k Value) (v Value, found bool, err error) { 177 h, err := k.Hash() 178 if err != nil { 179 return nil, false, err // unhashable 180 } 181 if h == 0 { 182 h = 1 // zero is reserved 183 } 184 if ht.table == nil { 185 return None, false, nil // empty 186 } 187 188 // Inspect each bucket in the bucket list. 189 for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { 190 for i := range p.entries { 191 e := &p.entries[i] 192 if e.hash == h { 193 if eq, err := Equal(k, e.key); err != nil { 194 return nil, false, err // e.g. excessively recursive tuple 195 } else if eq { 196 return e.value, true, nil // found 197 } 198 } 199 } 200 } 201 return None, false, nil // not found 202 } 203 204 // count returns the number of distinct elements of iter that are elements of ht. 205 func (ht *hashtable) count(iter Iterator) (int, error) { 206 if ht.table == nil { 207 return 0, nil // empty 208 } 209 210 var k Value 211 count := 0 212 213 // Use a bitset per table entry to record seen elements of ht. 214 // Elements are identified by their bucket number and index within the bucket. 215 // Each bitset gets one word initially, but may grow. 216 storage := make([]big.Word, len(ht.table)) 217 bitsets := make([]big.Int, len(ht.table)) 218 for i := range bitsets { 219 bitsets[i].SetBits(storage[i : i+1 : i+1]) 220 } 221 for iter.Next(&k) && count != int(ht.len) { 222 h, err := k.Hash() 223 if err != nil { 224 return 0, err // unhashable 225 } 226 if h == 0 { 227 h = 1 // zero is reserved 228 } 229 230 // Inspect each bucket in the bucket list. 231 bucketId := h & (uint32(len(ht.table) - 1)) 232 i := 0 233 for p := &ht.table[bucketId]; p != nil; p = p.next { 234 for j := range p.entries { 235 e := &p.entries[j] 236 if e.hash == h { 237 if eq, err := Equal(k, e.key); err != nil { 238 return 0, err 239 } else if eq { 240 bitIndex := i<<3 + j 241 if bitsets[bucketId].Bit(bitIndex) == 0 { 242 bitsets[bucketId].SetBit(&bitsets[bucketId], bitIndex, 1) 243 count++ 244 } 245 } 246 } 247 } 248 i++ 249 } 250 } 251 252 return count, nil 253 } 254 255 // Items returns all the items in the map (as key/value pairs) in insertion order. 256 func (ht *hashtable) items() []Tuple { 257 items := make([]Tuple, 0, ht.len) 258 array := make([]Value, ht.len*2) // allocate a single backing array 259 for e := ht.head; e != nil; e = e.next { 260 pair := Tuple(array[:2:2]) 261 array = array[2:] 262 pair[0] = e.key 263 pair[1] = e.value 264 items = append(items, pair) 265 } 266 return items 267 } 268 269 func (ht *hashtable) first() (Value, bool) { 270 if ht.head != nil { 271 return ht.head.key, true 272 } 273 return None, false 274 } 275 276 func (ht *hashtable) keys() []Value { 277 keys := make([]Value, 0, ht.len) 278 for e := ht.head; e != nil; e = e.next { 279 keys = append(keys, e.key) 280 } 281 return keys 282 } 283 284 func (ht *hashtable) delete(k Value) (v Value, found bool, err error) { 285 if err := ht.checkMutable("delete from"); err != nil { 286 return nil, false, err 287 } 288 if ht.table == nil { 289 return None, false, nil // empty 290 } 291 h, err := k.Hash() 292 if err != nil { 293 return nil, false, err // unhashable 294 } 295 if h == 0 { 296 h = 1 // zero is reserved 297 } 298 299 // Inspect each bucket in the bucket list. 300 for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { 301 for i := range p.entries { 302 e := &p.entries[i] 303 if e.hash == h { 304 if eq, err := Equal(k, e.key); err != nil { 305 return nil, false, err 306 } else if eq { 307 // Remove e from doubly-linked list. 308 *e.prevLink = e.next 309 if e.next == nil { 310 ht.tailLink = e.prevLink // deletion of last entry 311 } else { 312 e.next.prevLink = e.prevLink 313 } 314 315 v := e.value 316 *e = entry{} 317 ht.len-- 318 return v, true, nil // found 319 } 320 } 321 } 322 } 323 324 // TODO(adonovan): opt: remove completely empty bucket from bucket list. 325 326 return None, false, nil // not found 327 } 328 329 // checkMutable reports an error if the hash table should not be mutated. 330 // verb+" dict" should describe the operation. 331 func (ht *hashtable) checkMutable(verb string) error { 332 if ht.frozen { 333 return fmt.Errorf("cannot %s frozen hash table", verb) 334 } 335 if ht.itercount > 0 { 336 return fmt.Errorf("cannot %s hash table during iteration", verb) 337 } 338 return nil 339 } 340 341 func (ht *hashtable) clear() error { 342 if err := ht.checkMutable("clear"); err != nil { 343 return err 344 } 345 if ht.table != nil { 346 for i := range ht.table { 347 ht.table[i] = bucket{} 348 } 349 } 350 ht.head = nil 351 ht.tailLink = &ht.head 352 ht.len = 0 353 return nil 354 } 355 356 func (ht *hashtable) addAll(other *hashtable) error { 357 for e := other.head; e != nil; e = e.next { 358 if err := ht.insert(e.key, e.value); err != nil { 359 return err 360 } 361 } 362 return nil 363 } 364 365 // dump is provided as an aid to debugging. 366 func (ht *hashtable) dump() { 367 fmt.Printf("hashtable %p len=%d head=%p tailLink=%p", 368 ht, ht.len, ht.head, ht.tailLink) 369 if ht.tailLink != nil { 370 fmt.Printf(" *tailLink=%p", *ht.tailLink) 371 } 372 fmt.Println() 373 for j := range ht.table { 374 fmt.Printf("bucket chain %d\n", j) 375 for p := &ht.table[j]; p != nil; p = p.next { 376 fmt.Printf("bucket %p\n", p) 377 for i := range p.entries { 378 e := &p.entries[i] 379 fmt.Printf("\tentry %d @ %p hash=%d key=%v value=%v\n", 380 i, e, e.hash, e.key, e.value) 381 fmt.Printf("\t\tnext=%p &next=%p prev=%p", 382 e.next, &e.next, e.prevLink) 383 if e.prevLink != nil { 384 fmt.Printf(" *prev=%p", *e.prevLink) 385 } 386 fmt.Println() 387 } 388 } 389 } 390 } 391 392 func (ht *hashtable) iterate() *keyIterator { 393 if !ht.frozen { 394 ht.itercount++ 395 } 396 return &keyIterator{ht: ht, e: ht.head} 397 } 398 399 type keyIterator struct { 400 ht *hashtable 401 e *entry 402 } 403 404 func (it *keyIterator) Next(k *Value) bool { 405 if it.e != nil { 406 *k = it.e.key 407 it.e = it.e.next 408 return true 409 } 410 return false 411 } 412 413 func (it *keyIterator) Done() { 414 if !it.ht.frozen { 415 it.ht.itercount-- 416 } 417 } 418 419 // TODO(adonovan): use go1.19's maphash.String. 420 421 // hashString computes the hash of s. 422 func hashString(s string) uint32 { 423 if len(s) >= 12 { 424 // Call the Go runtime's optimized hash implementation, 425 // which uses the AESENC instruction on amd64 machines. 426 return uint32(goStringHash(s, 0)) 427 } 428 return softHashString(s) 429 } 430 431 //go:linkname goStringHash runtime.stringHash 432 func goStringHash(s string, seed uintptr) uintptr 433 434 // softHashString computes the 32-bit FNV-1a hash of s in software. 435 func softHashString(s string) uint32 { 436 var h uint32 = 2166136261 437 for i := 0; i < len(s); i++ { 438 h ^= uint32(s[i]) 439 h *= 16777619 440 } 441 return h 442 }