github.com/lab47/exprcore@v0.0.0-20210525052339-fb7d6bd9331e/exprcore/hashtable.go (about) 1 // Copyright 2017 The Bazel Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package exprcore 6 7 import ( 8 "fmt" 9 _ "unsafe" // for go:linkname hack 10 ) 11 12 // hashtable is used to represent exprcore dict and set values. 13 // It is a hash table whose key/value entries form a doubly-linked list 14 // in the order the entries were inserted. 15 type hashtable struct { 16 table []bucket // len is zero or a power of two 17 bucket0 [1]bucket // inline allocation for small maps. 18 len uint32 19 itercount uint32 // number of active iterators (ignored if frozen) 20 head *entry // insertion order doubly-linked list; may be nil 21 tailLink **entry // address of nil link at end of list (perhaps &head) 22 frozen bool 23 } 24 25 const bucketSize = 8 26 27 type bucket struct { 28 entries [bucketSize]entry 29 next *bucket // linked list of buckets 30 } 31 32 type entry struct { 33 hash uint32 // nonzero => in use 34 key, value Value 35 next *entry // insertion order doubly-linked list; may be nil 36 prevLink **entry // address of link to this entry (perhaps &head) 37 } 38 39 func (ht *hashtable) init(size int) { 40 if size < 0 { 41 panic("size < 0") 42 } 43 nb := 1 44 for overloaded(size, nb) { 45 nb = nb << 1 46 } 47 if nb < 2 { 48 ht.table = ht.bucket0[:1] 49 } else { 50 ht.table = make([]bucket, nb) 51 } 52 ht.tailLink = &ht.head 53 } 54 55 func (ht *hashtable) freeze() { 56 if !ht.frozen { 57 ht.frozen = true 58 for i := range ht.table { 59 for p := &ht.table[i]; p != nil; p = p.next { 60 for i := range p.entries { 61 e := &p.entries[i] 62 if e.hash != 0 { 63 e.key.Freeze() 64 e.value.Freeze() 65 } 66 } 67 } 68 } 69 } 70 } 71 72 func (ht *hashtable) insert(k, v Value) error { 73 if ht.frozen { 74 return fmt.Errorf("cannot insert into frozen hash table") 75 } 76 if ht.itercount > 0 { 77 return fmt.Errorf("cannot insert into hash table during iteration") 78 } 79 if ht.table == nil { 80 ht.init(1) 81 } 82 h, err := k.Hash() 83 if err != nil { 84 return err 85 } 86 if h == 0 { 87 h = 1 // zero is reserved 88 } 89 90 retry: 91 var insert *entry 92 93 // Inspect each bucket in the bucket list. 94 p := &ht.table[h&(uint32(len(ht.table)-1))] 95 for { 96 for i := range p.entries { 97 e := &p.entries[i] 98 if e.hash != h { 99 if e.hash == 0 { 100 // Found empty entry; make a note. 101 insert = e 102 } 103 continue 104 } 105 if eq, err := Equal(k, e.key); err != nil { 106 return err // e.g. excessively recursive tuple 107 } else if !eq { 108 continue 109 } 110 // Key already present; update value. 111 e.value = v 112 return nil 113 } 114 if p.next == nil { 115 break 116 } 117 p = p.next 118 } 119 120 // Key not found. p points to the last bucket. 121 122 // Does the number of elements exceed the buckets' load factor? 123 if overloaded(int(ht.len), len(ht.table)) { 124 ht.grow() 125 goto retry 126 } 127 128 if insert == nil { 129 // No space in existing buckets. Add a new one to the bucket list. 130 b := new(bucket) 131 p.next = b 132 insert = &b.entries[0] 133 } 134 135 // Insert key/value pair. 136 insert.hash = h 137 insert.key = k 138 insert.value = v 139 140 // Append entry to doubly-linked list. 141 insert.prevLink = ht.tailLink 142 *ht.tailLink = insert 143 ht.tailLink = &insert.next 144 145 ht.len++ 146 147 return nil 148 } 149 150 func overloaded(elems, buckets int) bool { 151 const loadFactor = 6.5 // just a guess 152 return elems >= bucketSize && float64(elems) >= loadFactor*float64(buckets) 153 } 154 155 func (ht *hashtable) grow() { 156 // Double the number of buckets and rehash. 157 // TODO(adonovan): opt: 158 // - avoid reentrant calls to ht.insert, and specialize it. 159 // e.g. we know the calls to Equals will return false since 160 // there are no duplicates among the old keys. 161 // - saving the entire hash in the bucket would avoid the need to 162 // recompute the hash. 163 // - save the old buckets on a free list. 164 ht.table = make([]bucket, len(ht.table)<<1) 165 oldhead := ht.head 166 ht.head = nil 167 ht.tailLink = &ht.head 168 ht.len = 0 169 for e := oldhead; e != nil; e = e.next { 170 ht.insert(e.key, e.value) 171 } 172 ht.bucket0[0] = bucket{} // clear out unused initial bucket 173 } 174 175 func (ht *hashtable) lookup(k Value) (v Value, found bool, err error) { 176 h, err := k.Hash() 177 if err != nil { 178 return nil, false, err // unhashable 179 } 180 if h == 0 { 181 h = 1 // zero is reserved 182 } 183 if ht.table == nil { 184 return None, false, nil // empty 185 } 186 187 // Inspect each bucket in the bucket list. 188 for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { 189 for i := range p.entries { 190 e := &p.entries[i] 191 if e.hash == h { 192 if eq, err := Equal(k, e.key); err != nil { 193 return nil, false, err // e.g. excessively recursive tuple 194 } else if eq { 195 return e.value, true, nil // found 196 } 197 } 198 } 199 } 200 return None, false, nil // not found 201 } 202 203 // Items returns all the items in the map (as key/value pairs) in insertion order. 204 func (ht *hashtable) items() []Tuple { 205 items := make([]Tuple, 0, ht.len) 206 array := make([]Value, ht.len*2) // allocate a single backing array 207 for e := ht.head; e != nil; e = e.next { 208 pair := Tuple(array[:2:2]) 209 array = array[2:] 210 pair[0] = e.key 211 pair[1] = e.value 212 items = append(items, pair) 213 } 214 return items 215 } 216 217 func (ht *hashtable) first() (Value, bool) { 218 if ht.head != nil { 219 return ht.head.key, true 220 } 221 return None, false 222 } 223 224 func (ht *hashtable) keys() []Value { 225 keys := make([]Value, 0, ht.len) 226 for e := ht.head; e != nil; e = e.next { 227 keys = append(keys, e.key) 228 } 229 return keys 230 } 231 232 func (ht *hashtable) values() []Value { 233 values := make([]Value, 0, ht.len) 234 for e := ht.head; e != nil; e = e.next { 235 values = append(values, e.value) 236 } 237 return values 238 } 239 240 func (ht *hashtable) delete(k Value) (v Value, found bool, err error) { 241 if ht.frozen { 242 return nil, false, fmt.Errorf("cannot delete from frozen hash table") 243 } 244 if ht.itercount > 0 { 245 return nil, false, fmt.Errorf("cannot delete from hash table during iteration") 246 } 247 if ht.table == nil { 248 return None, false, nil // empty 249 } 250 h, err := k.Hash() 251 if err != nil { 252 return nil, false, err // unhashable 253 } 254 if h == 0 { 255 h = 1 // zero is reserved 256 } 257 258 // Inspect each bucket in the bucket list. 259 for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { 260 for i := range p.entries { 261 e := &p.entries[i] 262 if e.hash == h { 263 if eq, err := Equal(k, e.key); err != nil { 264 return nil, false, err 265 } else if eq { 266 // Remove e from doubly-linked list. 267 *e.prevLink = e.next 268 if e.next == nil { 269 ht.tailLink = e.prevLink // deletion of last entry 270 } else { 271 e.next.prevLink = e.prevLink 272 } 273 274 v := e.value 275 *e = entry{} 276 ht.len-- 277 return v, true, nil // found 278 } 279 } 280 } 281 } 282 283 // TODO(adonovan): opt: remove completely empty bucket from bucket list. 284 285 return None, false, nil // not found 286 } 287 288 func (ht *hashtable) clear() error { 289 if ht.frozen { 290 return fmt.Errorf("cannot clear frozen hash table") 291 } 292 if ht.itercount > 0 { 293 return fmt.Errorf("cannot clear hash table during iteration") 294 } 295 if ht.table != nil { 296 for i := range ht.table { 297 ht.table[i] = bucket{} 298 } 299 } 300 ht.head = nil 301 ht.tailLink = &ht.head 302 ht.len = 0 303 return nil 304 } 305 306 // dump is provided as an aid to debugging. 307 func (ht *hashtable) dump() { 308 fmt.Printf("hashtable %p len=%d head=%p tailLink=%p", 309 ht, ht.len, ht.head, ht.tailLink) 310 if ht.tailLink != nil { 311 fmt.Printf(" *tailLink=%p", *ht.tailLink) 312 } 313 fmt.Println() 314 for j := range ht.table { 315 fmt.Printf("bucket chain %d\n", j) 316 for p := &ht.table[j]; p != nil; p = p.next { 317 fmt.Printf("bucket %p\n", p) 318 for i := range p.entries { 319 e := &p.entries[i] 320 fmt.Printf("\tentry %d @ %p hash=%d key=%v value=%v\n", 321 i, e, e.hash, e.key, e.value) 322 fmt.Printf("\t\tnext=%p &next=%p prev=%p", 323 e.next, &e.next, e.prevLink) 324 if e.prevLink != nil { 325 fmt.Printf(" *prev=%p", *e.prevLink) 326 } 327 fmt.Println() 328 } 329 } 330 } 331 } 332 333 func (ht *hashtable) iterate() *keyIterator { 334 if !ht.frozen { 335 ht.itercount++ 336 } 337 return &keyIterator{ht: ht, e: ht.head} 338 } 339 340 type keyIterator struct { 341 ht *hashtable 342 e *entry 343 } 344 345 func (it *keyIterator) Next(k *Value) bool { 346 if it.e != nil { 347 *k = it.e.key 348 it.e = it.e.next 349 return true 350 } 351 return false 352 } 353 354 func (it *keyIterator) Done() { 355 if !it.ht.frozen { 356 it.ht.itercount-- 357 } 358 } 359 360 // hashString computes the hash of s. 361 func hashString(s string) uint32 { 362 if len(s) >= 12 { 363 // Call the Go runtime's optimized hash implementation, 364 // which uses the AESENC instruction on amd64 machines. 365 return uint32(goStringHash(s, 0)) 366 } 367 return softHashString(s) 368 } 369 370 //go:linkname goStringHash runtime.stringHash 371 func goStringHash(s string, seed uintptr) uintptr 372 373 // softHashString computes the FNV hash of s in software. 374 func softHashString(s string) uint32 { 375 var h uint32 376 for i := 0; i < len(s); i++ { 377 h ^= uint32(s[i]) 378 h *= 16777619 379 } 380 return h 381 }