github.com/k14s/starlark-go@v0.0.0-20200720175618-3a5c849cc368/starlark/hashtable.go (about) 1 // Copyright 2017 The Bazel Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package starlark 6 7 import ( 8 "fmt" 9 _ "unsafe" // for go:linkname hack 10 ) 11 12 // hashtable is used to represent Starlark dict and set values. 13 // It is a hash table whose key/value entries form a doubly-linked list 14 // in the order the entries were inserted. 15 type hashtable struct { 16 table []bucket // len is zero or a power of two 17 bucket0 [1]bucket // inline allocation for small maps. 18 len uint32 19 itercount uint32 // number of active iterators (ignored if frozen) 20 head *entry // insertion order doubly-linked list; may be nil 21 tailLink **entry // address of nil link at end of list (perhaps &head) 22 frozen bool 23 } 24 25 const bucketSize = 8 26 27 type bucket struct { 28 entries [bucketSize]entry 29 next *bucket // linked list of buckets 30 } 31 32 type entry struct { 33 hash uint32 // nonzero => in use 34 key, value Value 35 next *entry // insertion order doubly-linked list; may be nil 36 prevLink **entry // address of link to this entry (perhaps &head) 37 } 38 39 func (ht *hashtable) init(size int) { 40 if size < 0 { 41 panic("size < 0") 42 } 43 nb := 1 44 for overloaded(size, nb) { 45 nb = nb << 1 46 } 47 if nb < 2 { 48 ht.table = ht.bucket0[:1] 49 } else { 50 ht.table = make([]bucket, nb) 51 } 52 ht.tailLink = &ht.head 53 } 54 55 func (ht *hashtable) freeze() { 56 if !ht.frozen { 57 ht.frozen = true 58 for i := range ht.table { 59 for p := &ht.table[i]; p != nil; p = p.next { 60 for i := range p.entries { 61 e := &p.entries[i] 62 if e.hash != 0 { 63 e.key.Freeze() 64 e.value.Freeze() 65 } 66 } 67 } 68 } 69 } 70 } 71 72 func (ht *hashtable) insert(k, v Value) error { 73 if ht.frozen { 74 return fmt.Errorf("cannot insert into frozen hash table") 75 } 76 if ht.itercount > 0 { 77 return fmt.Errorf("cannot insert into hash table during iteration") 78 } 79 if ht.table == nil { 80 ht.init(1) 81 } 82 h, err := k.Hash() 83 if err != nil { 84 return err 85 } 86 if h == 0 { 87 h = 1 // zero is reserved 88 } 89 90 retry: 91 var insert *entry 92 93 // Inspect each bucket in the bucket list. 94 p := &ht.table[h&(uint32(len(ht.table)-1))] 95 for { 96 for i := range p.entries { 97 e := &p.entries[i] 98 if e.hash != h { 99 if e.hash == 0 { 100 // Found empty entry; make a note. 101 insert = e 102 } 103 continue 104 } 105 if eq, err := Equal(k, e.key); err != nil { 106 return err // e.g. excessively recursive tuple 107 } else if !eq { 108 continue 109 } 110 // Key already present; update value. 111 e.value = v 112 return nil 113 } 114 if p.next == nil { 115 break 116 } 117 p = p.next 118 } 119 120 // Key not found. p points to the last bucket. 121 122 // Does the number of elements exceed the buckets' load factor? 123 if overloaded(int(ht.len), len(ht.table)) { 124 ht.grow() 125 goto retry 126 } 127 128 if insert == nil { 129 // No space in existing buckets. Add a new one to the bucket list. 130 b := new(bucket) 131 p.next = b 132 insert = &b.entries[0] 133 } 134 135 // Insert key/value pair. 136 insert.hash = h 137 insert.key = k 138 insert.value = v 139 140 // Append entry to doubly-linked list. 141 insert.prevLink = ht.tailLink 142 *ht.tailLink = insert 143 ht.tailLink = &insert.next 144 145 ht.len++ 146 147 return nil 148 } 149 150 func overloaded(elems, buckets int) bool { 151 const loadFactor = 6.5 // just a guess 152 return elems >= bucketSize && float64(elems) >= loadFactor*float64(buckets) 153 } 154 155 func (ht *hashtable) grow() { 156 // Double the number of buckets and rehash. 157 // TODO(adonovan): opt: 158 // - avoid reentrant calls to ht.insert, and specialize it. 159 // e.g. we know the calls to Equals will return false since 160 // there are no duplicates among the old keys. 161 // - saving the entire hash in the bucket would avoid the need to 162 // recompute the hash. 163 // - save the old buckets on a free list. 164 ht.table = make([]bucket, len(ht.table)<<1) 165 oldhead := ht.head 166 ht.head = nil 167 ht.tailLink = &ht.head 168 ht.len = 0 169 for e := oldhead; e != nil; e = e.next { 170 ht.insert(e.key, e.value) 171 } 172 ht.bucket0[0] = bucket{} // clear out unused initial bucket 173 } 174 175 func (ht *hashtable) lookup(k Value) (v Value, found bool, err error) { 176 h, err := k.Hash() 177 if err != nil { 178 return nil, false, err // unhashable 179 } 180 if h == 0 { 181 h = 1 // zero is reserved 182 } 183 if ht.table == nil { 184 return None, false, nil // empty 185 } 186 187 // Inspect each bucket in the bucket list. 188 for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { 189 for i := range p.entries { 190 e := &p.entries[i] 191 if e.hash == h { 192 if eq, err := Equal(k, e.key); err != nil { 193 return nil, false, err // e.g. excessively recursive tuple 194 } else if eq { 195 return e.value, true, nil // found 196 } 197 } 198 } 199 } 200 return None, false, nil // not found 201 } 202 203 // Items returns all the items in the map (as key/value pairs) in insertion order. 204 func (ht *hashtable) items() []Tuple { 205 items := make([]Tuple, 0, ht.len) 206 array := make([]Value, ht.len*2) // allocate a single backing array 207 for e := ht.head; e != nil; e = e.next { 208 pair := Tuple(array[:2:2]) 209 array = array[2:] 210 pair[0] = e.key 211 pair[1] = e.value 212 items = append(items, pair) 213 } 214 return items 215 } 216 217 func (ht *hashtable) first() (Value, bool) { 218 if ht.head != nil { 219 return ht.head.key, true 220 } 221 return None, false 222 } 223 224 func (ht *hashtable) keys() []Value { 225 keys := make([]Value, 0, ht.len) 226 for e := ht.head; e != nil; e = e.next { 227 keys = append(keys, e.key) 228 } 229 return keys 230 } 231 232 func (ht *hashtable) delete(k Value) (v Value, found bool, err error) { 233 if ht.frozen { 234 return nil, false, fmt.Errorf("cannot delete from frozen hash table") 235 } 236 if ht.itercount > 0 { 237 return nil, false, fmt.Errorf("cannot delete from hash table during iteration") 238 } 239 if ht.table == nil { 240 return None, false, nil // empty 241 } 242 h, err := k.Hash() 243 if err != nil { 244 return nil, false, err // unhashable 245 } 246 if h == 0 { 247 h = 1 // zero is reserved 248 } 249 250 // Inspect each bucket in the bucket list. 251 for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { 252 for i := range p.entries { 253 e := &p.entries[i] 254 if e.hash == h { 255 if eq, err := Equal(k, e.key); err != nil { 256 return nil, false, err 257 } else if eq { 258 // Remove e from doubly-linked list. 259 *e.prevLink = e.next 260 if e.next == nil { 261 ht.tailLink = e.prevLink // deletion of last entry 262 } else { 263 e.next.prevLink = e.prevLink 264 } 265 266 v := e.value 267 *e = entry{} 268 ht.len-- 269 return v, true, nil // found 270 } 271 } 272 } 273 } 274 275 // TODO(adonovan): opt: remove completely empty bucket from bucket list. 276 277 return None, false, nil // not found 278 } 279 280 func (ht *hashtable) clear() error { 281 if ht.frozen { 282 return fmt.Errorf("cannot clear frozen hash table") 283 } 284 if ht.itercount > 0 { 285 return fmt.Errorf("cannot clear hash table during iteration") 286 } 287 if ht.table != nil { 288 for i := range ht.table { 289 ht.table[i] = bucket{} 290 } 291 } 292 ht.head = nil 293 ht.tailLink = &ht.head 294 ht.len = 0 295 return nil 296 } 297 298 // dump is provided as an aid to debugging. 299 func (ht *hashtable) dump() { 300 fmt.Printf("hashtable %p len=%d head=%p tailLink=%p", 301 ht, ht.len, ht.head, ht.tailLink) 302 if ht.tailLink != nil { 303 fmt.Printf(" *tailLink=%p", *ht.tailLink) 304 } 305 fmt.Println() 306 for j := range ht.table { 307 fmt.Printf("bucket chain %d\n", j) 308 for p := &ht.table[j]; p != nil; p = p.next { 309 fmt.Printf("bucket %p\n", p) 310 for i := range p.entries { 311 e := &p.entries[i] 312 fmt.Printf("\tentry %d @ %p hash=%d key=%v value=%v\n", 313 i, e, e.hash, e.key, e.value) 314 fmt.Printf("\t\tnext=%p &next=%p prev=%p", 315 e.next, &e.next, e.prevLink) 316 if e.prevLink != nil { 317 fmt.Printf(" *prev=%p", *e.prevLink) 318 } 319 fmt.Println() 320 } 321 } 322 } 323 } 324 325 func (ht *hashtable) iterate() *keyIterator { 326 if !ht.frozen { 327 ht.itercount++ 328 } 329 return &keyIterator{ht: ht, e: ht.head} 330 } 331 332 type keyIterator struct { 333 ht *hashtable 334 e *entry 335 } 336 337 func (it *keyIterator) Next(k *Value) bool { 338 if it.e != nil { 339 *k = it.e.key 340 it.e = it.e.next 341 return true 342 } 343 return false 344 } 345 346 func (it *keyIterator) Done() { 347 if !it.ht.frozen { 348 it.ht.itercount-- 349 } 350 } 351 352 // hashString computes the hash of s. 353 func hashString(s string) uint32 { 354 if len(s) >= 12 { 355 // Call the Go runtime's optimized hash implementation, 356 // which uses the AESENC instruction on amd64 machines. 357 return uint32(goStringHash(s, 0)) 358 } 359 return softHashString(s) 360 } 361 362 //go:linkname goStringHash runtime.stringHash 363 func goStringHash(s string, seed uintptr) uintptr 364 365 // softHashString computes the FNV hash of s in software. 366 func softHashString(s string) uint32 { 367 var h uint32 368 for i := 0; i < len(s); i++ { 369 h ^= uint32(s[i]) 370 h *= 16777619 371 } 372 return h 373 }