github.com/fiatjaf/generic-ristretto@v0.0.1/z/btree_test.go (about) 1 /* 2 * Copyright 2020 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package z 18 19 import ( 20 "fmt" 21 "io/ioutil" 22 "math" 23 "math/rand" 24 "os" 25 "path/filepath" 26 "sort" 27 "testing" 28 "time" 29 30 "github.com/fiatjaf/generic-ristretto/z/simd" 31 "github.com/dustin/go-humanize" 32 "github.com/stretchr/testify/require" 33 ) 34 35 var tmp int 36 37 func setPageSize(sz int) { 38 pageSize = sz 39 maxKeys = (pageSize / 16) - 1 40 } 41 42 func TestTree(t *testing.T) { 43 bt := NewTree("TestTree") 44 defer func() { require.NoError(t, bt.Close()) }() 45 46 N := uint64(256 * 256) 47 for i := uint64(1); i < N; i++ { 48 bt.Set(i, i) 49 } 50 for i := uint64(1); i < N; i++ { 51 require.Equal(t, i, bt.Get(i)) 52 } 53 54 bt.DeleteBelow(100) 55 for i := uint64(1); i < 100; i++ { 56 require.Equal(t, uint64(0), bt.Get(i)) 57 } 58 for i := uint64(100); i < N; i++ { 59 require.Equal(t, i, bt.Get(i)) 60 } 61 } 62 63 func TestTreePersistent(t *testing.T) { 64 dir, err := ioutil.TempDir("", "") 65 require.NoError(t, err) 66 defer os.RemoveAll(dir) 67 path := filepath.Join(dir, "tree.buf") 68 69 // Create a tree and validate the data. 70 bt1, err := NewTreePersistent(path) 71 require.NoError(t, err) 72 N := uint64(64 << 10) 73 for i := uint64(1); i < N; i++ { 74 bt1.Set(i, i*2) 75 } 76 for i := uint64(1); i < N; i++ { 77 require.Equal(t, i*2, bt1.Get(i)) 78 } 79 bt1Stats := bt1.Stats() 80 require.NoError(t, bt1.Close()) 81 82 // Reopen tree and validate the data. 83 bt2, err := NewTreePersistent(path) 84 require.NoError(t, err) 85 require.Equal(t, bt2.freePage, bt1.freePage) 86 require.Equal(t, bt2.nextPage, bt1.nextPage) 87 bt2Stats := bt2.Stats() 88 // When reopening a tree, the allocated size becomes the file size. 89 // We don't need to compare this, it doesn't change anything in the tree. 90 bt2Stats.Allocated = bt1Stats.Allocated 91 require.Equal(t, bt1Stats, bt2Stats) 92 for i := uint64(1); i < N; i++ { 93 require.Equal(t, i*2, bt2.Get(i)) 94 } 95 // Delete all the data. This will change the value of bt.freePage. 96 bt2.DeleteBelow(math.MaxUint64) 97 bt2Stats = bt2.Stats() 98 require.NoError(t, bt2.Close()) 99 100 // Reopen tree and validate the data. 101 bt3, err := NewTreePersistent(path) 102 require.NoError(t, err) 103 require.Equal(t, bt2.freePage, bt3.freePage) 104 require.Equal(t, bt2.nextPage, bt3.nextPage) 105 bt3Stats := bt3.Stats() 106 bt3Stats.Allocated = bt2Stats.Allocated 107 require.Equal(t, bt2Stats, bt3Stats) 108 require.NoError(t, bt3.Close()) 109 } 110 111 func TestTreeBasic(t *testing.T) { 112 setAndGet := func() { 113 bt := NewTree("TestTreeBasic") 114 defer func() { require.NoError(t, bt.Close()) }() 115 116 N := uint64(1 << 20) 117 mp := make(map[uint64]uint64) 118 for i := uint64(1); i < N; i++ { 119 key := uint64(rand.Int63n(1<<60) + 1) 120 bt.Set(key, key) 121 mp[key] = key 122 } 123 for k, v := range mp { 124 require.Equal(t, v, bt.Get(k)) 125 } 126 127 stats := bt.Stats() 128 t.Logf("final stats: %+v\n", stats) 129 } 130 setAndGet() 131 defer setPageSize(os.Getpagesize()) 132 setPageSize(16 << 5) 133 setAndGet() 134 } 135 136 func TestTreeReset(t *testing.T) { 137 bt := NewTree("TestTreeReset") 138 defer func() { require.NoError(t, bt.Close()) }() 139 140 N := 1 << 10 141 val := rand.Uint64() 142 for i := 0; i < N; i++ { 143 bt.Set(rand.Uint64(), val) 144 } 145 146 // Truncate it to small size that is less than pageSize. 147 bt.Reset() 148 149 stats := bt.Stats() 150 // Verify the tree stats. 151 require.Equal(t, 2, stats.NumPages) 152 require.Equal(t, 1, stats.NumLeafKeys) 153 require.Equal(t, 2*pageSize, stats.Bytes) 154 expectedOcc := float64(1) * 100 / float64(2*maxKeys) 155 require.InDelta(t, expectedOcc, stats.Occupancy, 0.01) 156 require.Zero(t, stats.NumPagesFree) 157 // Check if we can reinsert the data. 158 mp := make(map[uint64]uint64) 159 for i := 0; i < N; i++ { 160 k := rand.Uint64() 161 mp[k] = val 162 bt.Set(k, val) 163 } 164 for k, v := range mp { 165 require.Equal(t, v, bt.Get(k)) 166 } 167 } 168 169 func TestTreeCycle(t *testing.T) { 170 bt := NewTree("TestTreeCycle") 171 defer func() { require.NoError(t, bt.Close()) }() 172 173 val := uint64(0) 174 for i := 0; i < 16; i++ { 175 for j := 0; j < 1e6+i*1e4; j++ { 176 val += 1 177 bt.Set(rand.Uint64(), val) 178 } 179 before := bt.Stats() 180 bt.DeleteBelow(val - 1e4) 181 after := bt.Stats() 182 t.Logf("Cycle %d Done. Before: %+v -> After: %+v\n", i, before, after) 183 } 184 185 bt.DeleteBelow(val) 186 stats := bt.Stats() 187 t.Logf("stats: %+v\n", stats) 188 require.LessOrEqual(t, stats.Occupancy, 1.0) 189 require.GreaterOrEqual(t, stats.NumPagesFree, int(float64(stats.NumPages)*0.95)) 190 } 191 192 func TestTreeIterateKV(t *testing.T) { 193 bt := NewTree("TestTreeIterateKV") 194 defer func() { require.NoError(t, bt.Close()) }() 195 196 // Set entries: (i, i*10) 197 const n = uint64(1 << 20) 198 for i := uint64(1); i <= n; i++ { 199 bt.Set(i, i*10) 200 } 201 202 // Validate entries: (i, i*10) 203 // Set entries: (i, i*20) 204 count := uint64(0) 205 bt.IterateKV(func(k, v uint64) uint64 { 206 require.Equal(t, k*10, v) 207 count++ 208 return k * 20 209 }) 210 require.Equal(t, n, count) 211 212 // Validate entries: (i, i*20) 213 count = uint64(0) 214 bt.IterateKV(func(k, v uint64) uint64 { 215 require.Equal(t, k*20, v) 216 count++ 217 return 0 218 }) 219 require.Equal(t, n, count) 220 } 221 222 func TestOccupancyRatio(t *testing.T) { 223 // atmax 4 keys per node 224 setPageSize(16 * 5) 225 defer setPageSize(os.Getpagesize()) 226 require.Equal(t, 4, maxKeys) 227 228 bt := NewTree("TestOccupancyRatio") 229 defer func() { require.NoError(t, bt.Close()) }() 230 231 expectedRatio := float64(1) * 100 / float64(2*maxKeys) // 2 because we'll have 2 pages. 232 stats := bt.Stats() 233 t.Logf("Expected ratio: %.2f. MaxKeys: %d. Stats: %+v\n", expectedRatio, maxKeys, stats) 234 require.InDelta(t, expectedRatio, stats.Occupancy, 0.01) 235 for i := uint64(1); i <= 3; i++ { 236 bt.Set(i, i) 237 } 238 // Tree structure will be: 239 // [2,Max,_,_] 240 // [1,2,_,_] [3,Max,_,_] 241 expectedRatio = float64(4) * 100 / float64(3*maxKeys) 242 stats = bt.Stats() 243 t.Logf("Expected ratio: %.2f. MaxKeys: %d. Stats: %+v\n", expectedRatio, maxKeys, stats) 244 require.InDelta(t, expectedRatio, stats.Occupancy, 0.01) 245 bt.DeleteBelow(2) 246 // Tree structure will be: 247 // [2,Max,_] 248 // [2,_,_,_] [3,Max,_,_] 249 expectedRatio = float64(3) * 100 / float64(3*maxKeys) 250 stats = bt.Stats() 251 t.Logf("Expected ratio: %.2f. MaxKeys: %d. Stats: %+v\n", expectedRatio, maxKeys, stats) 252 require.InDelta(t, expectedRatio, stats.Occupancy, 0.01) 253 } 254 255 func TestNode(t *testing.T) { 256 n := getNode(make([]byte, pageSize)) 257 for i := uint64(1); i < 16; i *= 2 { 258 n.set(i, i) 259 } 260 n.print(0) 261 require.True(t, 0 == n.get(5)) 262 n.set(5, 5) 263 n.print(0) 264 265 n.setBit(0) 266 require.False(t, n.isLeaf()) 267 n.setBit(bitLeaf) 268 require.True(t, n.isLeaf()) 269 } 270 271 func TestNodeBasic(t *testing.T) { 272 n := getNode(make([]byte, pageSize)) 273 N := uint64(256) 274 mp := make(map[uint64]uint64) 275 for i := uint64(1); i < N; i++ { 276 key := uint64(rand.Int63n(1<<60) + 1) 277 n.set(key, key) 278 mp[key] = key 279 } 280 for k, v := range mp { 281 require.Equal(t, v, n.get(k)) 282 } 283 } 284 285 func TestNode_MoveRight(t *testing.T) { 286 n := getNode(make([]byte, pageSize)) 287 N := uint64(10) 288 for i := uint64(1); i < N; i++ { 289 n.set(i, i) 290 } 291 n.moveRight(5) 292 n.iterate(func(n node, i int) { 293 if i < 5 { 294 require.Equal(t, uint64(i+1), n.key(i)) 295 require.Equal(t, uint64(i+1), n.val(i)) 296 } else if i > 5 { 297 require.Equal(t, uint64(i), n.key(i)) 298 require.Equal(t, uint64(i), n.val(i)) 299 } 300 }) 301 } 302 303 func TestNodeCompact(t *testing.T) { 304 n := getNode(make([]byte, pageSize)) 305 n.setBit(bitLeaf) 306 N := uint64(128) 307 mp := make(map[uint64]uint64) 308 for i := uint64(1); i < N; i++ { 309 key := i 310 val := uint64(10) 311 if i%2 == 0 { 312 val = 20 313 mp[key] = 20 314 } 315 n.set(key, val) 316 } 317 318 require.Equal(t, int(N/2), n.compact(11)) 319 for k, v := range mp { 320 require.Equal(t, v, n.get(k)) 321 } 322 require.Equal(t, uint64(127), n.maxKey()) 323 } 324 325 func BenchmarkPurge(b *testing.B) { 326 N := 16 << 20 327 b.Run("go-mem", func(b *testing.B) { 328 m := make(map[uint64]uint64) 329 for i := 0; i < N; i++ { 330 m[rand.Uint64()] = uint64(i) 331 } 332 }) 333 334 b.Run("btree", func(b *testing.B) { 335 start := time.Now() 336 bt := NewTree("BenchmarkPurge") 337 defer func() { require.NoError(b, bt.Close()) }() 338 for i := 0; i < N; i++ { 339 bt.Set(rand.Uint64(), uint64(i)) 340 } 341 b.Logf("Populate took: %s. stats: %+v\n", time.Since(start), bt.Stats()) 342 343 start = time.Now() 344 before := bt.Stats() 345 bt.DeleteBelow(uint64(N - 1<<20)) 346 after := bt.Stats() 347 b.Logf("Purge took: %s. Before: %+v After: %+v\n", time.Since(start), before, after) 348 }) 349 } 350 351 func BenchmarkWrite(b *testing.B) { 352 b.Run("map", func(b *testing.B) { 353 mp := make(map[uint64]uint64) 354 for n := 0; n < b.N; n++ { 355 k := rand.Uint64() 356 mp[k] = k 357 } 358 }) 359 b.Run("btree", func(b *testing.B) { 360 bt := NewTree("BenchmarkWrite") 361 defer func() { require.NoError(b, bt.Close()) }() 362 b.ResetTimer() 363 for n := 0; n < b.N; n++ { 364 k := rand.Uint64() 365 bt.Set(k, k) 366 } 367 }) 368 } 369 370 // goos: linux 371 // goarch: amd64 372 // pkg: github.com/fiatjaf/generic-ristretto/z 373 // BenchmarkRead/map-4 10845322 109 ns/op 374 // BenchmarkRead/btree-4 2744283 430 ns/op 375 // Cumulative for 10 runs. 376 // name time/op 377 // Read/map-4 105ns ± 1% 378 // Read/btree-4 422ns ± 1% 379 func BenchmarkRead(b *testing.B) { 380 N := 10 << 20 381 mp := make(map[uint64]uint64) 382 for i := 0; i < N; i++ { 383 k := uint64(rand.Intn(2*N)) + 1 384 mp[k] = k 385 } 386 b.Run("map", func(b *testing.B) { 387 for i := 0; i < b.N; i++ { 388 k := uint64(rand.Intn(2 * N)) 389 v, ok := mp[k] 390 _, _ = v, ok 391 } 392 }) 393 394 bt := NewTree("BenchmarkRead") 395 defer func() { require.NoError(b, bt.Close()) }() 396 for i := 0; i < N; i++ { 397 k := uint64(rand.Intn(2*N)) + 1 398 bt.Set(k, k) 399 } 400 stats := bt.Stats() 401 fmt.Printf("Num pages: %d Size: %s\n", stats.NumPages, 402 humanize.IBytes(uint64(stats.Bytes))) 403 fmt.Println("Writes done.") 404 405 b.Run("btree", func(b *testing.B) { 406 for i := 0; i < b.N; i++ { 407 k := uint64(rand.Intn(2*N)) + 1 408 v := bt.Get(k) 409 _ = v 410 } 411 }) 412 } 413 414 func BenchmarkSearch(b *testing.B) { 415 linear := func(n node, k uint64, N int) int { 416 for i := 0; i < N; i++ { 417 if ki := n.key(i); ki >= k { 418 return i 419 } 420 } 421 return N 422 } 423 binary := func(n node, k uint64, N int) int { 424 return sort.Search(N, func(i int) bool { 425 return n.key(i) >= k 426 }) 427 } 428 unroll4 := func(n node, k uint64, N int) int { 429 if len(n[:2*N]) < 8 { 430 for i := 0; i < N; i++ { 431 if ki := n.key(i); ki >= k { 432 return i 433 } 434 } 435 return N 436 } 437 return int(simd.Search(n[:2*N], k)) 438 } 439 440 jumpBy := []int{8, 16, 32, 64, 128, 196, 255} 441 for _, sz := range jumpBy { 442 f, err := ioutil.TempFile(".", "tree") 443 require.NoError(b, err) 444 445 mf, err := OpenMmapFileUsing(f, pageSize, true) 446 if err != NewFile { 447 require.NoError(b, err) 448 } 449 450 n := getNode(mf.Data) 451 for i := 1; i <= sz; i++ { 452 n.set(uint64(i), uint64(i)) 453 } 454 455 b.Run(fmt.Sprintf("linear-%d", sz), func(b *testing.B) { 456 for i := 0; i < b.N; i++ { 457 tmp = linear(n, math.MaxUint64, sz) 458 } 459 }) 460 b.Run(fmt.Sprintf("binary-%d", sz), func(b *testing.B) { 461 for i := 0; i < b.N; i++ { 462 tmp = binary(n, uint64(sz), sz) 463 } 464 }) 465 b.Run(fmt.Sprintf("unrolled-asm-%d", sz), func(b *testing.B) { 466 for i := 0; i < b.N; i++ { 467 tmp = unroll4(n, math.MaxUint64, sz) 468 } 469 }) 470 mf.Close(0) 471 os.Remove(f.Name()) 472 } 473 } 474 475 // This benchmark when run on dgus-delta, performed marginally better with threshold=32. 476 // CustomSearch/sz-64_th-1-4 49.9ns ± 1% (fully binary) 477 // CustomSearch/sz-64_th-16-4 63.3ns ± 0% 478 // CustomSearch/sz-64_th-32-4 58.7ns ± 7% 479 // CustomSearch/sz-64_th-64-4 63.9ns ± 7% (fully linear) 480 481 // CustomSearch/sz-128_th-32-4 70.2ns ± 1% 482 483 // CustomSearch/sz-255_th-1-4 77.3ns ± 0% (fully binary) 484 // CustomSearch/sz-255_th-16-4 68.2ns ± 1% 485 // CustomSearch/sz-255_th-32-4 67.0ns ± 7% 486 // CustomSearch/sz-255_th-64-4 85.5ns ±19% 487 // CustomSearch/sz-255_th-256-4 129ns ± 6% (fully linear) 488 489 func BenchmarkCustomSearch(b *testing.B) { 490 mixed := func(n node, k uint64, N int, threshold int) int { 491 lo, hi := 0, N 492 // Reduce the search space using binary seach and then do linear search. 493 for hi-lo > threshold { 494 mid := (hi + lo) / 2 495 km := n.key(mid) 496 if k == km { 497 return mid 498 } 499 if k > km { 500 // key is greater than the key at mid, so move right. 501 lo = mid + 1 502 } else { 503 // else move left. 504 hi = mid 505 } 506 } 507 for i := lo; i <= hi; i++ { 508 if ki := n.key(i); ki >= k { 509 return i 510 } 511 } 512 return N 513 } 514 515 for _, sz := range []int{64, 128, 255} { 516 n := getNode(make([]byte, pageSize)) 517 for i := 1; i <= sz; i++ { 518 n.set(uint64(i), uint64(i)) 519 } 520 521 mk := sz + 1 522 for th := 1; th <= sz+1; th *= 2 { 523 b.Run(fmt.Sprintf("sz-%d th-%d", sz, th), func(b *testing.B) { 524 for i := 0; i < b.N; i++ { 525 k := uint64(rand.Intn(mk)) 526 tmp = mixed(n, k, sz, th) 527 } 528 }) 529 } 530 } 531 }