github.com/thepudds/swisstable@v0.0.0-20221011152303-9c77dc657777/map_test.go (about) 1 package swisstable 2 3 import ( 4 "flag" 5 "fmt" 6 "math" 7 "math/bits" 8 "math/rand" 9 "sort" 10 "testing" 11 12 "github.com/google/go-cmp/cmp" 13 ) 14 15 var longTestFlag = flag.Bool("long", false, "run long benchmarks") 16 var coldMemTestFlag = flag.Float64("coldmem", 512, "memory in MB to use for cold memory tests. should be substantially larger than L3 cache.") 17 18 // TODO: 1000 is probably reasonable 19 var repFlag = flag.Int("rep", 200, "number of repetitions for some tests that are randomized") 20 21 func TestMap_Get(t *testing.T) { 22 tests := []struct { 23 name string 24 keys []Key 25 }{ 26 {"one key", []Key{1}}, 27 {"small, with one grow", list(0, 20, 1)}, 28 {"small, with multiple grows", list(0, 111, 1)}, // from fuzzing 29 } 30 31 for _, tt := range tests { 32 t.Run(fmt.Sprintf(tt.name), func(t *testing.T) { 33 m := New(10) 34 m.hashFunc = identityHash 35 36 for _, k := range tt.keys { 37 m.Set(Key(k), Value(k)) 38 } 39 40 gotLen := m.Len() 41 if gotLen != len(tt.keys) { 42 t.Errorf("Map.Len() = %d, want %d", gotLen, len(tt.keys)) 43 } 44 45 for _, k := range tt.keys { 46 gotV, gotOk := m.Get(k) 47 if gotV != Value(k) || !gotOk { 48 t.Errorf("Map.Get(%v) = %v, %v. want = %v, true", k, gotV, gotOk, k) 49 } 50 } 51 52 notPresent := Key(1e12) 53 gotV, gotOk := m.Get(notPresent) 54 if gotV != 0 || gotOk { 55 t.Errorf("Map.Get(notPresent) = %v, %v. want = 0, false", gotV, gotOk) 56 } 57 }) 58 } 59 } 60 61 func TestMap_Range(t *testing.T) { 62 tests := []struct { 63 name string 64 elems map[Key]Value 65 }{ 66 { 67 "three elements", 68 map[Key]Value{ 69 1: 2, 70 8: 8, 71 1e6: 1e10, 72 }, 73 }, 74 } 75 76 for _, tt := range tests { 77 t.Run(tt.name, func(t *testing.T) { 78 m := New(256) // TODO: confirm this is probably 512 underlying table length? 79 80 for key, value := range tt.elems { 81 m.Set(key, value) 82 gotV, gotOk := m.Get(key) 83 if !gotOk { 84 t.Errorf("Map.Get() gotOk = %v, want true", gotOk) 85 } 86 if gotV != value { 87 t.Errorf("Map.Get() gotV = %v, want %v", gotV, value) 88 } 89 } 90 got := make(map[Key]Value) 91 m.Range(func(key Key, value Value) bool { 92 // validate we don't see the same key twice 93 _, ok := got[key] 94 if ok { 95 dumpFixedTables(m) 96 t.Errorf("Map.Range() key %v seen before", key) 97 } 98 got[key] = value 99 return true 100 }) 101 // validate our returned key/values match what we put in 102 if diff := cmp.Diff(tt.elems, got); diff != "" { 103 t.Errorf("Map.Range() result mismatch (-want +got):\n%s", diff) 104 } 105 gotLen := m.Len() 106 if gotLen != len(tt.elems) { 107 t.Errorf("Map.Len() gotV = %v, want %v", gotLen, len(tt.elems)) 108 } 109 }) 110 } 111 } 112 113 func TestMap_Delete(t *testing.T) { 114 tests := []struct { 115 name string 116 capacity int 117 disableResizing bool 118 insert int 119 deleteFront int 120 deleteBack int 121 }{ 122 { 123 name: "small, delete one", 124 disableResizing: false, 125 capacity: 256, 126 insert: 2, 127 deleteFront: 1, 128 deleteBack: 0, 129 }, 130 { 131 name: "small, delete one after resizing", 132 disableResizing: false, 133 capacity: 10, 134 insert: 20, // this forces a resize 135 deleteFront: 0, 136 deleteBack: 0, 137 }, 138 { 139 name: "delete ten after resizing", 140 disableResizing: false, 141 capacity: 256, 142 insert: 510, // this forces a resize 143 deleteFront: 0, 144 deleteBack: 10, 145 }, 146 { 147 name: "delete ten force fill", 148 disableResizing: true, 149 capacity: 256, 150 insert: 510, // this is close to full 151 deleteFront: 0, 152 deleteBack: 10, 153 }, 154 { 155 name: "delete all after resizing", 156 disableResizing: false, 157 capacity: 256, 158 insert: 511, // this forces a resize 159 deleteFront: 256, 160 deleteBack: 256, 161 }, 162 { 163 name: "delete all force fill", 164 disableResizing: true, 165 capacity: 256, 166 insert: 511, // this is a force fill, leaving one empty slot 167 deleteFront: 256, 168 deleteBack: 256, 169 }, 170 } 171 172 for _, tt := range tests { 173 t.Run(tt.name, func(t *testing.T) { 174 m := New(tt.capacity) 175 want := make(map[Key]Value) 176 177 for i := 0; i < tt.insert; i++ { 178 m.Set(Key(i), Value(i)) 179 want[Key(i)] = Value(i) 180 } 181 182 // // Delete a non-existent key 183 m.Delete(-1) 184 delete(want, -1) 185 186 // Delete requested keys 187 for i := 0; i < tt.deleteFront; i++ { 188 m.Delete(Key(i)) 189 delete(want, Key(i)) 190 } 191 for i := tt.insert - tt.deleteBack; i < tt.insert; i++ { 192 m.Delete(Key(i)) 193 delete(want, Key(i)) 194 } 195 196 got := make(map[Key]Value) 197 m.Range(func(key Key, value Value) bool { 198 // validate we don't see the same key twice 199 _, ok := got[key] 200 if ok { 201 t.Errorf("Map.Range() key %v seen twice", key) 202 } 203 got[key] = value 204 return true 205 }) 206 207 if diff := cmp.Diff(want, got); diff != "" { 208 t.Logf("slots: %v", m.current.slots) 209 t.Errorf("Map.Range() result mismatch (-want +got):\n%s", diff) 210 } 211 gotLen := m.Len() 212 if gotLen != len(want) { 213 t.Errorf("Map.Len() gotV = %v, want %v", gotLen, len(want)) 214 } 215 }) 216 } 217 } 218 219 // TODO: force example of an *allowed* repeat key from a range, such as: 220 // https://go.dev/play/p/y8kvkPoNCv_H 221 // We can't quite create that same pattern with the current TestMap_RangeAddDelete, 222 // including the bulk add doesn't happen after all the preceding add/deletes. 223 224 func TestMap_RangeAddDelete(t *testing.T) { 225 tests := []struct { 226 name string 227 repeatAllowed bool // allow repeated key, such as if add X, del X, then add X while iterating 228 capacity int 229 start []Key 230 del []Key 231 add []Key 232 addBulk []Key // can be set up to trigger resize in middle of loop if desired 233 addBulk2 []Key 234 bulkIndex int // loop index in Map range to do the addBulk 235 }{ 236 { 237 name: "small", 238 repeatAllowed: true, // this pattern could in theory trigger repeat key 239 capacity: 16, 240 start: []Key{1, 2, 3, 4}, 241 del: []Key{3, 4}, 242 add: []Key{5, 6, 4, 7}, 243 addBulk: nil, 244 addBulk2: nil, 245 bulkIndex: 0, 246 }, 247 { 248 name: "small with one grow", 249 repeatAllowed: false, 250 capacity: 8, // will be table len of 16 251 start: []Key{1, 2, 3, 4}, 252 del: nil, 253 add: nil, 254 addBulk: list(5, 15, 1), 255 addBulk2: nil, 256 bulkIndex: 0, 257 }, 258 { 259 name: "small with two grows", 260 repeatAllowed: false, 261 capacity: 8, // will be table len of 16 262 start: []Key{1, 2, 3, 4}, 263 del: nil, 264 add: nil, 265 addBulk: list(5, 30, 1), 266 addBulk2: nil, 267 bulkIndex: 0, 268 }, 269 { 270 name: "small, start iter mid-grow then grow", 271 repeatAllowed: false, 272 capacity: 8, // will be table len of 16 273 start: list(0, 53, 1), 274 del: nil, 275 add: nil, 276 addBulk: list(64, 128, 1), 277 addBulk2: nil, 278 bulkIndex: 0, 279 }, 280 { 281 name: "medium", 282 repeatAllowed: true, // this pattern could in theory trigger repeat key 283 capacity: 650, 284 start: list(0, 500, 1), 285 del: list(10, 400, 1), 286 add: list(500, 650, 1), 287 addBulk: list(10, 400, 1), 288 addBulk2: []Key{}, 289 bulkIndex: 400, 290 }, 291 { 292 name: "medium, start iter mid-grow then grow", 293 repeatAllowed: false, 294 capacity: 8, // will be table len of 16 295 start: list(0, 417, 1), // trigger growth at 416 296 del: nil, 297 add: list(512, 950, 1), 298 addBulk: nil, 299 addBulk2: nil, 300 bulkIndex: 415, 301 }, 302 { 303 name: "medium, start iter mid-grow, overlapping writes during iter", // from fuzzing 304 repeatAllowed: false, 305 capacity: 48, 306 start: list(48, 102, 1), // 54 elems, grow starts at 52 307 del: nil, 308 add: nil, 309 addBulk: list(11, 119, 1), // 108 elems, some overlapping 310 addBulk2: nil, 311 bulkIndex: 0, 312 }, 313 { 314 name: "medium, two bulks adds", 315 repeatAllowed: true, // this pattern could in theory trigger repeat key 316 capacity: 650, 317 start: list(0, 300, 1), 318 del: list(0, 299, 1), 319 add: nil, 320 addBulk: list(1000, 1300, 1), 321 addBulk2: list(0, 300, 1), 322 bulkIndex: 256 + 8, 323 }, 324 { 325 name: "medium, no del", 326 repeatAllowed: false, 327 capacity: 650, 328 start: list(0, 500, 1), 329 del: nil, 330 add: list(500, 650, 1), 331 addBulk: list(10, 400, 1), 332 addBulk2: nil, 333 bulkIndex: 400, 334 }, 335 { 336 name: "medium, no add overlaps del", 337 repeatAllowed: false, 338 capacity: 650, 339 start: list(0, 500, 1), 340 del: list(10, 400, 1), // no add overlaps with what we delete 341 add: list(500, 650, 1), 342 addBulk: list(500, 800, 1), 343 addBulk2: nil, 344 bulkIndex: 400, 345 }, 346 } 347 348 for _, tt := range tests { 349 tt := tt 350 for _, startCap := range []int{tt.capacity, 10, 20, 40, 52, 53, 54, 100, 1000} { 351 t.Run(fmt.Sprintf("%s, start cap %d", tt.name, startCap), func(t *testing.T) { 352 t.Parallel() 353 for rep := 0; rep < *repFlag; rep++ { 354 // Create the Map under test. 355 m := New(startCap) 356 m.seed = uintptr(rep) 357 // TODO: 358 // m.hashFunc = identityHash 359 // m.hashFunc = zeroHash 360 // // TODO: TEMP. get into subtest name 361 switch rep { 362 case 0: 363 m.hashFunc = identityHash 364 case 1: 365 // do this second (worse perf, even further from reality than identityHash) 366 m.hashFunc = zeroHash 367 default: 368 m.hashFunc = hashUint64 // real hash 369 } 370 371 for _, key := range tt.start { 372 m.Set(key, Value(key)) 373 } 374 375 // Create some sets to dynamically track validity of keys that appear in a range 376 allowed := newKeySet(tt.start) // tracks start + added - deleted; these keys allowed but not required 377 mustSee := newKeySet(tt.start) // tracks start - deleted; these are keys we are required to see at some point 378 seen := newKeySet(nil) // use to verify no dups, and at end, used to verify mustSee 379 // Also dynamically track if key X is added, deleted, and then re-added during iteration, 380 // which means it is legal per Go spec to be seen again in the iteration. 381 // Example with stdlib map repeating keys during iter: https://go.dev/play/p/RN-v8rmQmeE 382 deleted := newKeySet(nil) 383 addedAfterDeleted := newKeySet(nil) 384 385 // during loop, verify no duplicate keys and we only see allowed keys. 386 // after loop, verify that we saw everything that we were required to see. 387 i := 0 388 m.Range(func(key Key, value Value) bool { 389 if seen.contains(key) { 390 if !tt.repeatAllowed { 391 t.Fatalf("Map.Range() key %v seen twice, unexpected for this test", key) 392 } 393 // Even though this pattern is generally allowed to have repeats, 394 // verify this specific key has been added, then deleted, then added, 395 // which means it is legal to see it later in the iteration after 396 // being re-added. 397 if !addedAfterDeleted.contains(key) { 398 t.Fatalf("Map.Range() key %v seen twice and was not re-added after being deleted", key) 399 } 400 } 401 seen.add(key) 402 403 if !allowed.contains(key) { 404 t.Fatalf("Map.Range() key %v seen but not allowed (e.g., might have been deleted, or never added)", key) 405 } 406 407 // Delete one key, if requested 408 if i < len(tt.del) { 409 k := tt.del[i] 410 m.Delete(k) 411 allowed.remove(k) 412 mustSee.remove(k) // We are no longer required to see this... It's ok if we saw it earlier 413 deleted.add(k) 414 if addedAfterDeleted.contains(k) { 415 addedAfterDeleted.remove(k) 416 } 417 } 418 419 set := func(k Key, v Value) { 420 m.Set(k, v) // TODO: not checking values. maybe different test? 421 allowed.add(k) 422 if deleted.contains(k) { 423 addedAfterDeleted.add(k) 424 deleted.remove(k) 425 } 426 } 427 // Add one key, if requested 428 if i < len(tt.add) { 429 set(tt.add[i], Value(i+1e6)) 430 } 431 // Bulk add keys, if requested 432 if i == tt.bulkIndex { 433 for _, k := range tt.addBulk { 434 set(k, Value(i+1e9)) 435 } 436 for _, k := range tt.addBulk2 { 437 set(k, Value(i+1e12)) 438 } 439 } 440 i++ 441 return true 442 }) 443 444 for _, key := range mustSee.elems() { 445 if !seen.contains(key) { 446 dumpFixedTables(m) 447 t.Fatalf("Map.Range() expected key %v not seen. table size: %d grows: %d", 448 key, m.elemCount, m.resizeGenerations) 449 } 450 } 451 452 if !tt.repeatAllowed && addedAfterDeleted.len() > 0 { 453 // TODO: is this still working? Verfied once 454 // repeatAllowed could be inferred in theory, 455 // but keep it as extra sanity check and to be more explicit on expectations 456 t.Fatal("repeatAllowed incorrectly set to false") 457 } 458 } 459 }) 460 } 461 } 462 } 463 464 // TestMap_IterGrowAndDelete is modeled after TestIterGrowAndDelete 465 // from runtime/map_test.go. 466 func TestMap_IterGrowAndDelete(t *testing.T) { 467 m := New(16) // will resize 468 for i := 0; i < 100; i++ { 469 m.Set(Key(i), Value(i)) 470 } 471 growflag := true 472 m.Range(func(key Key, value Value) bool { 473 if growflag { 474 // grow the table 475 for i := 100; i < 1000; i++ { 476 m.Set(Key(i), Value(i)) 477 } 478 // delete all odd keys 479 for i := 1; i < 1000; i += 2 { 480 m.Delete(Key(i)) 481 } 482 growflag = false 483 } else { 484 if key&1 == 1 { 485 t.Errorf("odd value returned %d", key) 486 } 487 } 488 return true 489 }) 490 } 491 492 func TestMap_StoredKeys(t *testing.T) { 493 // TODO: probably make helper? 494 list := func(start, end Key) []Key { 495 var res []Key 496 for i := start; i < end; i++ { 497 res = append(res, i) 498 } 499 return res 500 } 501 502 storedKeys := func(m *Map) []Key { 503 // reach into the implementation to return 504 // keys in stored order 505 if m.old != nil { 506 panic("unexpectedly growing") 507 } 508 var keys []Key 509 for i := range m.current.control { 510 if isStored(m.current.control[i]) { 511 keys = append(keys, m.current.slots[i].Key) 512 } 513 } 514 return keys 515 } 516 517 tests := []struct { 518 name string 519 capacity int 520 start []Key 521 del []Key 522 add []Key 523 want []Key 524 }{ 525 { 526 name: "delete key, add different key, 1 group", 527 capacity: 8, // ends up with 16 slots 528 start: []Key{0, 1, 2, 3}, 529 del: []Key{2}, 530 add: []Key{42}, // the slot that had 2 is replaced with 42 531 want: []Key{0, 1, 42, 3}, 532 }, 533 { 534 name: "delete key 1st group, add different key, 2 groups", 535 capacity: 16, // ends up with 32 slots 536 start: list(0, 20), // [0, 20) 537 del: []Key{2}, 538 add: []Key{42}, // the DELETED slot that had 2 is replaced with 42 539 want: append([]Key{0, 1, 42}, list(3, 20)...), 540 }, 541 { 542 name: "delete key 1st group, set key present in 2nd group", 543 capacity: 16, // ends up with 32 slots 544 start: list(0, 20), // [0, 20) 545 del: []Key{2}, 546 add: []Key{19}, // should end up with single 19, still in the second group 547 want: append([]Key{0, 1}, list(3, 20)...), 548 }, 549 } 550 551 for _, tt := range tests { 552 t.Run(tt.name, func(t *testing.T) { 553 // Create the Map under test. 554 m := New(tt.capacity) 555 556 // Reach into the implementation to force a terrible hash func, 557 // which lets us more predictably place elems. 558 hashToZero := func(k Key, seed uintptr) uint64 { 559 // could do something like: return uint64(k << m.current.h2Shift) 560 return 0 561 } 562 m.hashFunc = hashToZero 563 564 // Apply our operations 565 for _, key := range tt.start { 566 m.Set(key, Value(key)) 567 } 568 for _, key := range tt.del { 569 m.Delete(key) 570 } 571 for _, key := range tt.add { 572 m.Set(key, Value(key)) 573 } 574 575 got := storedKeys(m) 576 if diff := cmp.Diff(tt.want, got); diff != "" { 577 t.Logf("got: %v", got) 578 t.Errorf("stored keys mismatch (-want +got):\n%s", diff) 579 } 580 }) 581 } 582 } 583 584 func TestMap_ForceFill(t *testing.T) { 585 tests := []struct { 586 elem KV 587 }{ 588 {KV{Key: 1, Value: 2}}, 589 {KV{Key: 8, Value: 8}}, 590 {KV{Key: 1e6, Value: 1e10}}, 591 } 592 593 for _, tt := range tests { 594 t.Run(fmt.Sprintf("get key %d", tt.elem.Key), func(t *testing.T) { 595 size := 10_000 596 m := New(size) 597 m.disableResizing = true 598 599 // TODO: this is true for sparsehash, but not our swisstable, 600 // which sizes the underlying table slices to roundPow2(1/0.8) times the requested capacity. 601 // TODO: also, might no longer be true for sparestable, either. 602 603 // TODO: reach in to disable growth? 604 // We reach into the implementation to see what full means. 605 underlyingTableLen := len(m.current.slots) 606 t.Logf("setting %d elements in table with underlying size %d", underlyingTableLen-1, underlyingTableLen) 607 608 // Force the underlying table to fill up the map so that it only has one empty slot left, 609 // without any resizing. This helps verify our triangular numbers are correct and 610 // we cycle properly. We also do this in a loop (so we set the same values repeatedly) 611 // in order to slightly stress things a bit more. 612 for i := 0; i < 100; i++ { 613 for j := 1000; j < 1000+underlyingTableLen-1; j++ { 614 m.Set(Key(j), Value(j)) 615 } 616 } 617 618 // Confirm it is nearly 100% full, with only room for one more 619 gotLen := m.Len() 620 if gotLen != underlyingTableLen-1 { 621 t.Errorf("Map.Len gotLen = %v, want %v", gotLen, underlyingTableLen-1) 622 } 623 624 missingKey := Key(1e12) 625 gotV, gotOk := m.Get(missingKey) 626 if gotOk { 627 t.Errorf("Map.Get(missingKey) gotOk = %v, want false", gotOk) 628 } 629 if gotV != 0 { 630 t.Errorf("Map.Get(missingKey) gotV = %v, want %v", gotV, 0) 631 } 632 633 // Set one more value, which should make our table 100% full, 634 // and confirm we can get it back. 635 m.Set(tt.elem.Key, tt.elem.Value) 636 gotV, gotOk = m.Get(tt.elem.Key) 637 if !gotOk { 638 t.Errorf("Map.Get(%d) gotOk = %v, want true", tt.elem.Key, gotOk) 639 } 640 if gotV != tt.elem.Value { 641 t.Errorf("Map.Get(%d) gotV = %v, want %v", tt.elem.Key, gotV, tt.elem.Value) 642 } 643 644 // Confirm it is 100% full according to the public API. 645 gotLen = m.Len() 646 if gotLen != underlyingTableLen { 647 t.Errorf("Map.Len gotLen = %v, want %v", gotLen, underlyingTableLen) 648 } 649 // Reach in to the impl and to confirm that it is indeed seem to be 100% full 650 for i := 0; i < len(m.current.control); i++ { 651 if m.current.control[i] == emptySentinel { 652 t.Fatalf("control byte %d is empty", i) 653 } 654 } 655 for i := 0; i < len(m.current.slots); i++ { 656 if m.current.slots[i].Key == 0 || m.current.slots[i].Value == 0 { 657 // We set everything to non-zero values above. 658 t.Fatalf("element at index %d has key or value that is still 0: key = %d value = %d", 659 i, m.current.slots[i].Key, m.current.slots[i].Value) 660 } 661 } 662 }) 663 } 664 } 665 666 func Test_StatusByte(t *testing.T) { 667 // probably/hopefully overkill 668 b := byte(0) 669 if isEvacuated(b) || isChainEvacuated(b) || curHasDisplaced(b) { 670 t.Errorf("statusByte unexpectedly set") 671 } 672 673 got := setEvacuated(b) 674 if !isEvacuated(got) { 675 t.Errorf("isEvacuated() = false, got = %v", got) 676 } 677 if isChainEvacuated(got) { 678 t.Errorf("isChainEvacuated() = true") 679 } 680 if curHasDisplaced(got) { 681 t.Errorf("curHasDisplaced() = true") 682 } 683 684 got = setChainEvacuated(b) 685 if isEvacuated(got) { 686 t.Errorf("isEvacuated() = true") 687 } 688 if !isChainEvacuated(got) { 689 t.Errorf("isChainEvacuated() = false, got = %v", got) 690 } 691 if curHasDisplaced(got) { 692 t.Errorf("curHasDisplaced() = true") 693 } 694 695 got = setCurHasDisplaced(b) 696 if isEvacuated(got) { 697 t.Errorf("isEvacuated() = true") 698 } 699 if isChainEvacuated(got) { 700 t.Errorf("isChainEvacuated() = true") 701 } 702 if !curHasDisplaced(got) { 703 t.Errorf("curHasDisplaced() = false, got = %v", got) 704 } 705 } 706 707 func BenchmarkMatchByte(b *testing.B) { 708 buffer := make([]byte, 16) 709 b.ResetTimer() 710 for i := 0; i < b.N; i++ { 711 _, _ = MatchByte(42, buffer) 712 } 713 } 714 715 func BenchmarkFillGrow_Swiss(b *testing.B) { 716 bms := almostGrowPointMapSizes([]int{ 717 1 << 10, 718 1 << 20, 719 1 << 23, 720 }) 721 if !*longTestFlag { 722 bms = []benchmark{ 723 {"map size 1000000", 1_000_000}, 724 } 725 } 726 for _, bm := range bms { 727 b.Run(bm.name, func(b *testing.B) { 728 b.ReportAllocs() 729 730 for i := 0; i < b.N; i++ { 731 m := New(10) 732 for j := Key(0); j < Key(bm.mapElements); j++ { 733 m.Set(j, Value(j)) 734 } 735 } 736 }) 737 } 738 } 739 740 func BenchmarkFillGrow_Std(b *testing.B) { 741 bms := almostGrowPointMapSizes([]int{ 742 1 << 10, 743 1 << 20, 744 1 << 23, 745 }) 746 if !*longTestFlag { 747 bms = []benchmark{ 748 {"map size 1000000", 1_000_000}, 749 } 750 } 751 for _, bm := range bms { 752 b.Run(bm.name, func(b *testing.B) { 753 b.ReportAllocs() 754 755 for i := 0; i < b.N; i++ { 756 m := make(map[int64]int64, 10) 757 for j := int64(0); j < int64(bm.mapElements); j++ { 758 m[j] = j 759 } 760 } 761 }) 762 } 763 } 764 765 func BenchmarkFillPresize_Swiss(b *testing.B) { 766 bms := almostGrowPointMapSizes([]int{ 767 1 << 10, 768 1 << 20, 769 1 << 23, 770 }) 771 if !*longTestFlag { 772 bms = []benchmark{ 773 {"map size 1000000", 1_000_000}, 774 } 775 } 776 for _, bm := range bms { 777 b.Run(bm.name, func(b *testing.B) { 778 b.ReportAllocs() 779 780 for i := 0; i < b.N; i++ { 781 m := New(bm.mapElements) 782 for j := Key(0); j < Key(bm.mapElements); j++ { 783 m.Set(j, Value(j)) 784 } 785 } 786 }) 787 } 788 } 789 790 func BenchmarkFillPresize_Std(b *testing.B) { 791 bms := almostGrowPointMapSizes([]int{ 792 1 << 10, 793 1 << 20, 794 1 << 23, 795 }) 796 if !*longTestFlag { 797 bms = []benchmark{ 798 {"map size 1000000", 1_000_000}, 799 } 800 } 801 for _, bm := range bms { 802 b.Run(bm.name, func(b *testing.B) { 803 b.ReportAllocs() 804 805 for i := 0; i < b.N; i++ { 806 m := make(map[int64]int64, bm.mapElements) 807 for j := int64(0); j < int64(bm.mapElements); j++ { 808 m[j] = j 809 } 810 } 811 }) 812 } 813 } 814 815 // TODO: probably change over to sinkKey, sinkValue, and use map[Key]Value as the runtime maps 816 var sinkUint uint64 817 var sinkInt int64 818 var sinkValue Value 819 var sinkBool bool 820 821 func BenchmarkGetHitHot_Swiss(b *testing.B) { 822 hotKeyCount := 20 823 lookupEachKey := 50 824 825 bms := almostGrowPointMapSizes([]int{ 826 1 << 10, 827 1 << 20, 828 1 << 23, 829 }) 830 if !*longTestFlag { 831 bms = []benchmark{ 832 {"map size 1000000", 1_000_000}, 833 } 834 } 835 836 for _, bm := range bms { 837 b.Run(bm.name, func(b *testing.B) { 838 // Fill the map under test 839 m := New(bm.mapElements) 840 for i := Key(0); i < Key(bm.mapElements); i++ { 841 m.Set(i, Value(i)) 842 } 843 844 // Generate random hot keys repeated N times then shuffled 845 var hotKeys []Key 846 for i := 0; i < hotKeyCount; i++ { 847 hotKeys = append(hotKeys, Key(rand.Intn(bm.mapElements))) 848 } 849 var gets []Key 850 for i := 0; i < hotKeyCount; i++ { 851 k := hotKeys[i] 852 for j := 0; j < lookupEachKey; j++ { 853 gets = append(gets, k) 854 } 855 } 856 rand.Shuffle(len(gets), func(i, j int) { 857 gets[i], gets[j] = gets[j], gets[i] 858 }) 859 860 b.ReportAllocs() 861 b.ResetTimer() 862 863 for i := 0; i < b.N; i++ { 864 for _, key := range gets { 865 v, b := m.Get(key) 866 sinkInt = int64(v) 867 sinkBool = b 868 } 869 } 870 }) 871 } 872 } 873 874 func BenchmarkGetHitHot_Std(b *testing.B) { 875 hotKeyCount := 20 876 lookupEachKey := 50 877 878 bms := almostGrowPointMapSizes([]int{ 879 1 << 10, 880 1 << 20, 881 1 << 23, 882 }) 883 if !*longTestFlag { 884 bms = []benchmark{ 885 {"map size 1000000", 1_000_000}, 886 } 887 } 888 889 for _, bm := range bms { 890 b.Run(bm.name, func(b *testing.B) { 891 // Fill the map under test 892 m := make(map[int64]int64, bm.mapElements) 893 for i := 0; i < bm.mapElements; i++ { 894 m[int64(i)] = int64(i) 895 } 896 897 // Generate random hot keys repeated N times then shuffled 898 var hotKeys []int64 899 for i := 0; i < hotKeyCount; i++ { 900 hotKeys = append(hotKeys, int64(rand.Intn(bm.mapElements))) 901 } 902 var gets []int64 903 for i := 0; i < hotKeyCount; i++ { 904 k := hotKeys[i] 905 for j := 0; j < lookupEachKey; j++ { 906 gets = append(gets, k) 907 } 908 } 909 rand.Shuffle(len(gets), func(i, j int) { 910 gets[i], gets[j] = gets[j], gets[i] 911 }) 912 913 b.ReportAllocs() 914 b.ResetTimer() 915 916 for i := 0; i < b.N; i++ { 917 for _, key := range gets { 918 sinkInt, sinkBool = m[key] 919 } 920 } 921 }) 922 } 923 } 924 925 func BenchmarkGetMissHot_Swiss(b *testing.B) { 926 hotKeyCount := 20 927 lookupEachKey := 50 928 929 bms := almostGrowPointMapSizes([]int{ 930 1 << 10, 931 1 << 20, 932 1 << 23, 933 }) 934 if !*longTestFlag { 935 bms = []benchmark{ 936 {"map size 1000000", 1_000_000}, 937 } 938 } 939 940 for _, bm := range bms { 941 b.Run(bm.name, func(b *testing.B) { 942 // Fill the map under test 943 m := New(bm.mapElements) 944 for i := Key(0); i < Key(bm.mapElements); i++ { 945 m.Set(i, Value(i)) 946 } 947 948 // Generate keys that don't exist, repeated N times then shuffled 949 var missKeys []Key 950 for i := 0; i < hotKeyCount; i++ { 951 missKeys = append(missKeys, Key(i+(1<<40))) 952 } 953 var gets []Key 954 for i := 0; i < hotKeyCount; i++ { 955 k := missKeys[i] 956 for j := 0; j < lookupEachKey; j++ { 957 gets = append(gets, k) 958 } 959 } 960 rand.Shuffle(len(gets), func(i, j int) { 961 gets[i], gets[j] = gets[j], gets[i] 962 }) 963 964 b.ReportAllocs() 965 b.ResetTimer() 966 967 for i := 0; i < b.N; i++ { 968 for _, key := range gets { 969 v, b := m.Get(key) 970 sinkInt = int64(v) 971 sinkBool = b 972 } 973 } 974 }) 975 } 976 } 977 978 func BenchmarkGetMissHot_Std(b *testing.B) { 979 hotKeyCount := 20 980 lookupEachKey := 50 981 982 bms := almostGrowPointMapSizes([]int{ 983 1 << 10, 984 1 << 20, 985 1 << 23, 986 }) 987 if !*longTestFlag { 988 bms = []benchmark{ 989 {"map size 1000000", 1_000_000}, 990 } 991 } 992 993 for _, bm := range bms { 994 b.Run(bm.name, func(b *testing.B) { 995 // Fill the map under test 996 m := make(map[int64]int64, bm.mapElements) 997 for i := 0; i < bm.mapElements; i++ { 998 m[int64(i)] = int64(i) 999 } 1000 1001 // Generate keys that don't exist, repeated N times then shuffled 1002 var missKeys []int64 1003 for i := 0; i < hotKeyCount; i++ { 1004 missKeys = append(missKeys, int64(i+(1<<40))) 1005 } 1006 var gets []int64 1007 for i := 0; i < hotKeyCount; i++ { 1008 k := missKeys[i] 1009 for j := 0; j < lookupEachKey; j++ { 1010 gets = append(gets, k) 1011 } 1012 } 1013 rand.Shuffle(len(gets), func(i, j int) { 1014 gets[i], gets[j] = gets[j], gets[i] 1015 }) 1016 1017 b.ReportAllocs() 1018 b.ResetTimer() 1019 1020 for i := 0; i < b.N; i++ { 1021 for _, key := range gets { 1022 sinkInt, sinkBool = m[key] 1023 } 1024 } 1025 }) 1026 } 1027 } 1028 1029 // BenchmarkGetAllStartCold_Std creates many maps so that they are 1030 // cold at the start. It is intended to be run with -benchtime=1x. 1031 func BenchmarkGetAllStartCold_Std(b *testing.B) { 1032 bms := almostGrowPointMapSizes([]int{ 1033 1 << 10, 1034 1 << 20, 1035 1 << 23, 1036 }) 1037 if !*longTestFlag { 1038 bms = []benchmark{ 1039 {"map size 1000000", 1_000_000}, 1040 } 1041 } 1042 1043 for _, bm := range bms { 1044 b.Run(bm.name, func(b *testing.B) { 1045 minMem := *coldMemTestFlag * (1 << 20) 1046 1047 // we don't use overhead to keep the count of maps consistent 1048 // across different implementations 1049 mapMem := float64(bm.mapElements) * 16 1050 mapCnt := int(math.Ceil(minMem / mapMem)) 1051 1052 keys := make([]int64, bm.mapElements) 1053 for i := int64(0); i < int64(len(keys)); i++ { 1054 keys[i] = i 1055 } 1056 1057 b.Logf("creating %d maps with %.1f MB of data. %d total keys", mapCnt, float64(mapCnt)*mapMem/(1<<20), mapCnt*bm.mapElements) 1058 maps := make([]map[int64]int64, mapCnt) 1059 for i := 0; i < mapCnt; i++ { 1060 m := make(map[int64]int64, bm.mapElements) 1061 for j := int64(0); j < int64(bm.mapElements); j++ { 1062 m[j] = j 1063 } 1064 maps[i] = m 1065 } 1066 1067 // Shuffle the keys after we have placed them in the maps. 1068 // Otherwise, we could favor early entrants in a given bucket when reading below. 1069 rand.Shuffle(len(keys), func(i, j int) { 1070 keys[i], keys[j] = keys[j], keys[i] 1071 }) 1072 1073 getKeys := func(m map[int64]int64, ratio float64) { 1074 count := int(ratio * float64(bm.mapElements)) 1075 for _, k := range keys { 1076 if count == 0 { 1077 break 1078 } 1079 count-- 1080 sinkInt, sinkBool = m[k] 1081 } 1082 } 1083 1084 b.ResetTimer() 1085 1086 for i := 0; i < b.N; i++ { 1087 // We keep the same order of maps and keys in their respective slices 1088 // so that any given map or key is more likely to be cold by the time we 1089 // cycle back around if b.N is > 1. In practice, b.N seems to usually be 1. 1090 for _, m := range maps { 1091 getKeys(m, 1.0) 1092 } 1093 } 1094 }) 1095 } 1096 } 1097 1098 // BenchmarkGetAllStartCold_Swiss creates many maps so that they are 1099 // cold at the start. It is intended to be run with -benchtime=1x. 1100 func BenchmarkGetAllStartCold_Swiss(b *testing.B) { 1101 bms := almostGrowPointMapSizes([]int{ 1102 1 << 10, 1103 1 << 20, 1104 1 << 23, 1105 }) 1106 if !*longTestFlag { 1107 bms = []benchmark{ 1108 {"map size 1000000", 1_000_000}, 1109 } 1110 } 1111 1112 for _, bm := range bms { 1113 b.Run(bm.name, func(b *testing.B) { 1114 minMem := *coldMemTestFlag * (1 << 20) 1115 1116 // we don't use overhead to keep the count of maps consistent 1117 // across different implementations 1118 mapMem := float64(bm.mapElements) * 16 1119 mapCnt := int(math.Ceil(minMem / mapMem)) 1120 1121 keys := make([]Key, bm.mapElements) 1122 for i := 0; i < len(keys); i++ { 1123 keys[i] = Key(i) 1124 } 1125 1126 b.Logf("creating %d maps with %.1f MB of data. %d total keys", mapCnt, float64(mapCnt)*mapMem/(1<<20), mapCnt*bm.mapElements) 1127 maps := make([]*Map, mapCnt) 1128 for i := 0; i < mapCnt; i++ { 1129 m := New(bm.mapElements) 1130 for j := 0; j < bm.mapElements; j++ { 1131 m.Set(Key(j), Value(j)) 1132 } 1133 maps[i] = m 1134 } 1135 1136 // Shuffle the keys after we have placed them in the maps. 1137 // Otherwise, we could favor early entrants in a given bucket when reading below. 1138 rand.Shuffle(len(keys), func(i, j int) { 1139 keys[i], keys[j] = keys[j], keys[i] 1140 }) 1141 1142 getKeys := func(m *Map, ratio float64) { 1143 count := int(ratio * float64(bm.mapElements)) 1144 for _, k := range keys { 1145 if count == 0 { 1146 break 1147 } 1148 count-- 1149 v, b := m.Get(Key(k)) 1150 sinkValue = v 1151 sinkBool = b 1152 } 1153 } 1154 1155 b.ResetTimer() 1156 1157 for i := 0; i < b.N; i++ { 1158 // We keep the same order of maps and keys in their respective slices 1159 // so that any given map or key is more likely to be cold by the time we 1160 // cycle back around if b.N is > 1. In practice, b.N seems to usually be 1. 1161 for _, m := range maps { 1162 getKeys(m, 1.0) 1163 } 1164 } 1165 }) 1166 } 1167 } 1168 1169 //go:noinline 1170 func iterStd(m map[int64]int64) int64 { 1171 var ret int64 1172 for _, a := range m { 1173 ret += a 1174 } 1175 return ret 1176 } 1177 1178 func BenchmarkRange_Std(b *testing.B) { 1179 // From https://github.com/golang/go/issues/51410, but with int64 rather than strings. 1180 // That should mean the hashing impact is less here. 1181 minSize := 51 // was 50 1182 maxSize := 58 // was 60 1183 for size := minSize; size < maxSize; size++ { 1184 b.Run(fmt.Sprintf("map_size_%d", size), func(b *testing.B) { 1185 m := make(map[int64]int64) 1186 for i := 0; i < size; i++ { 1187 m[int64(i)] = int64(i) 1188 } 1189 var x int64 1190 for i := 0; i < b.N; i++ { 1191 x += iterStd(m) 1192 } 1193 }) 1194 } 1195 } 1196 1197 //go:noinline 1198 func iterSwiss(m *Map) int64 { 1199 var ret int64 1200 m.Range(func(key Key, value Value) bool { 1201 ret += int64(value) 1202 return true 1203 }) 1204 return ret 1205 } 1206 1207 func BenchmarkRange_Swiss(b *testing.B) { 1208 // From https://github.com/golang/go/issues/51410, but with int64 rather than strings. 1209 // That should mean the hashing impact is less here. 1210 minSize := 51 // was 50 1211 maxSize := 58 // was 60 1212 for size := minSize; size < maxSize; size++ { 1213 b.Run(fmt.Sprintf("map_size_%d", size), func(b *testing.B) { 1214 m := New(10) 1215 for i := 0; i < size; i++ { 1216 m.Set(Key(i), Value(i)) 1217 } 1218 var x int64 1219 for i := 0; i < b.N; i++ { 1220 x += iterSwiss(m) 1221 } 1222 }) 1223 } 1224 } 1225 1226 type benchmark struct { 1227 name string 1228 mapElements int 1229 } 1230 1231 // coarseMapSizes returns a []benchmark with large steps from 1K to 100M elements 1232 func coarseMapSizes() []benchmark { 1233 const ( 1234 mapSizeCoarseLow = 1_000 1235 mapSizeCoarseHigh = 100_000_000 // 16 bytes * 1e8 = 1.6 GB of data, plus overhead 1236 mapSizeCoarseFactor = 1.75 1237 ) 1238 1239 var bms []benchmark 1240 mapSize := mapSizeCoarseLow 1241 for { 1242 mapSize = min(mapSize, mapSizeCoarseHigh) 1243 bms = append(bms, benchmark{fmt.Sprintf("map size %d", mapSize), mapSize}) 1244 if mapSize == mapSizeCoarseHigh { 1245 break 1246 } 1247 mapSize = int(float64(mapSize) * mapSizeCoarseFactor) 1248 } 1249 return bms 1250 } 1251 1252 func almostGrowPointMapSizes(pow2s []int) []benchmark { 1253 var bms []benchmark 1254 for _, size := range pow2s { 1255 if size&(size-1) != 0 || size == 0 { 1256 panic(fmt.Sprintf("bad test setup, size %d is not power of 2", size)) 1257 } 1258 growPoint := (size * 13 / 2 / 8) + 1 // TODO: centralize 1259 1260 s1 := int(0.8 * float64(growPoint)) 1261 s2 := int(1.2 * float64(growPoint)) 1262 1263 bms = append(bms, benchmark{name: fmt.Sprintf("map size %d", s1), mapElements: s1}) 1264 bms = append(bms, benchmark{name: fmt.Sprintf("map size %d", s2), mapElements: s2}) 1265 } 1266 return bms 1267 } 1268 1269 // fineMapSizes returns a []benchmark with smaller steps from 400K elements to 2.4M elements 1270 func fineMapSizes() []benchmark { 1271 const ( 1272 mapSizeFineLow = 400_000 1273 mapSizeFineHigh = 2_400_000 1274 mapSizeFineStep = 50_000 1275 ) 1276 1277 var bms []benchmark 1278 mapSize := mapSizeFineLow 1279 for { 1280 mapSize = min(mapSize, mapSizeFineHigh) 1281 bms = append(bms, benchmark{fmt.Sprintf("map size %d", mapSize), mapSize}) 1282 if mapSize == mapSizeFineHigh { 1283 break 1284 } 1285 mapSize += mapSizeFineStep 1286 } 1287 return bms 1288 } 1289 1290 func sweepMapSizes() []benchmark { 1291 const ( 1292 mapSizeSweepLow = 800_000 1293 mapSizeSweepHigh = 4_400_000 1294 mapSizeSweepFactor = 1.01 1295 ) 1296 1297 var bms []benchmark 1298 mapSize := mapSizeSweepLow 1299 for { 1300 mapSize = min(mapSize, mapSizeSweepHigh) 1301 bms = append(bms, benchmark{fmt.Sprintf("map size %d", mapSize), mapSize}) 1302 if mapSize == mapSizeSweepHigh { 1303 break 1304 } 1305 // fmt.Println("mapSize", mapSize) 1306 // fmt.Println("nextWorstCase", nextWorstCase) 1307 nextMapSize := int(float64(mapSize) * mapSizeSweepFactor) 1308 1309 // insert the expected worse case for memory usage at the size just before a resize would be triggered. 1310 // also include the two points immediately around that, where the point just before is the 1311 // expected best case for best case for memory usage. 1312 nextWorstCase := (roundPow2(mapSize) * 13 / 2 / 8) + 1 1313 if mapSize < nextWorstCase && nextWorstCase < nextMapSize { 1314 bms = append(bms, benchmark{fmt.Sprintf("map size %d", nextWorstCase-1), nextWorstCase - 1}) 1315 bms = append(bms, benchmark{fmt.Sprintf("map size %d", nextWorstCase), nextWorstCase}) 1316 bms = append(bms, benchmark{fmt.Sprintf("map size %d", nextWorstCase+1), nextWorstCase + 1}) 1317 } 1318 mapSize = nextMapSize 1319 } 1320 return bms 1321 } 1322 1323 func roundPow2(n int) int { 1324 if n == 0 { 1325 return 0 1326 } 1327 return 1 << (64 - bits.LeadingZeros64(uint64(n-1))) 1328 } 1329 1330 func min(a, b int) int { 1331 if a < b { 1332 return a 1333 } 1334 return b 1335 } 1336 1337 // zeroHash is a terrible hash function that is reproducible 1338 // and can help trigger corner cases. 1339 func zeroHash(k Key, seed uintptr) uint64 { 1340 // could do something like: return uint64(k << m.current.h2Shift) 1341 return 0 1342 } 1343 1344 // identityHash is another terrible hash function, but not as bad 1345 // as zeroHash. 1346 func identityHash(k Key, seed uintptr) uint64 { 1347 return uint64(k) 1348 } 1349 1350 func dumpFixedTables(m *Map) { 1351 tables := []struct { 1352 name string 1353 t *fixedTable 1354 }{ 1355 {"current", &m.current}, 1356 {"old", m.old}, 1357 } 1358 for _, t := range tables { 1359 fmt.Println("\n===", t.name, "===") 1360 if t.t == nil { 1361 fmt.Println("table is nil") 1362 return 1363 } 1364 for i := range t.t.slots { 1365 if i%16 == 0 { 1366 fmt.Println() 1367 fmt.Println(t.name, "group", i/16) 1368 fmt.Println("-----") 1369 } 1370 fmt.Printf("%08b %v\n", t.t.control[i], t.t.slots[i]) 1371 } 1372 } 1373 } 1374 1375 // list returns a slice of of keys based on start (inclusive), end (exclusive), and stride 1376 func list(start, end, stride Key) []Key { 1377 var res []Key 1378 for i := start; i < end; i += stride { 1379 res = append(res, i) 1380 } 1381 return res 1382 } 1383 1384 // keysAndValues collects keys and values from a Map into a runtime map 1385 // for use in testing and fuzzing. 1386 // It panics if the same key is observed twice while iterating over the keys. 1387 func keysAndValues(m *Map) map[Key]Value { 1388 res := make(map[Key]Value) 1389 m.Range(func(key Key, value Value) bool { 1390 // validate we don't see the same key twice 1391 _, ok := res[key] 1392 if ok { 1393 panic(fmt.Sprintf("Map.Range() key %v seen before", key)) 1394 } 1395 res[key] = value 1396 return true 1397 }) 1398 return res 1399 } 1400 1401 // keySet is a simple set to aid with valiation 1402 type keySet struct { 1403 m map[Key]struct{} 1404 } 1405 1406 func newKeySet(elems []Key) *keySet { 1407 s := &keySet{} 1408 s.m = make(map[Key]struct{}) 1409 for _, k := range elems { 1410 s.add(k) 1411 } 1412 return s 1413 } 1414 1415 func (s *keySet) add(k Key) { 1416 s.m[k] = struct{}{} 1417 } 1418 1419 func (s *keySet) remove(k Key) { 1420 delete(s.m, k) 1421 } 1422 1423 func (s *keySet) contains(k Key) bool { 1424 _, ok := s.m[k] 1425 return ok 1426 } 1427 1428 func (s *keySet) len() int { 1429 return len(s.m) 1430 } 1431 1432 func (s *keySet) elems() []Key { 1433 var keys []Key 1434 for key := range s.m { 1435 keys = append(keys, key) 1436 } 1437 sort.Slice(keys, func(i, j int) bool { 1438 return keys[i] < keys[j] 1439 }) 1440 return keys 1441 } 1442 1443 func Test_fixedTable_reconstructHash(t *testing.T) { 1444 tests := []struct { 1445 capacity int // must be power of 2 1446 }{ 1447 {16}, {1 << 6}, {1 << 17}, 1448 } 1449 for _, tt := range tests { 1450 t.Run(fmt.Sprintf("capacity %d", tt.capacity), func(t *testing.T) { 1451 table := newFixedTable(tt.capacity) 1452 for i := 0; i < 100; i++ { 1453 hash := hashUint64(Key(0), uintptr(i)) 1454 group := hash & table.groupMask 1455 h2 := table.h2(hash) 1456 1457 usefulPortionMask := (1 << (table.h2Shift + 7)) - 1 1458 usefulHash := hash & uint64(usefulPortionMask) 1459 1460 if got := table.reconstructHash(h2, group); got != usefulHash { 1461 t.Fatalf("fixedTable.reconstructHash() = 0x%X, want 0x%X", got, usefulHash) 1462 } 1463 } 1464 }) 1465 } 1466 }