github.com/etecs-ru/ristretto@v0.9.1/z/btree_test.go (about) 1 /* 2 * Copyright 2020 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package z 18 19 import ( 20 "fmt" 21 "math" 22 "math/rand" 23 "os" 24 "path/filepath" 25 "sort" 26 "testing" 27 "time" 28 29 "github.com/dustin/go-humanize" 30 "github.com/etecs-ru/ristretto/z/simd" 31 "github.com/stretchr/testify/require" 32 ) 33 34 var tmp int 35 36 func setPageSize(sz int) { 37 pageSize = sz 38 maxKeys = (pageSize / 16) - 1 39 } 40 41 func TestTree(t *testing.T) { 42 bt := NewTree("TestTree") 43 defer func() { require.NoError(t, bt.Close()) }() 44 45 N := uint64(256 * 256) 46 for i := uint64(1); i < N; i++ { 47 bt.Set(i, i) 48 } 49 for i := uint64(1); i < N; i++ { 50 require.Equal(t, i, bt.Get(i)) 51 } 52 53 bt.DeleteBelow(100) 54 for i := uint64(1); i < 100; i++ { 55 require.Equal(t, uint64(0), bt.Get(i)) 56 } 57 for i := uint64(100); i < N; i++ { 58 require.Equal(t, i, bt.Get(i)) 59 } 60 } 61 62 func TestTreePersistent(t *testing.T) { 63 path := filepath.Join(t.TempDir(), "tree.buf") 64 65 // Create a tree and validate the data. 66 bt1, err := NewTreePersistent(path) 67 require.NoError(t, err) 68 N := uint64(64 << 10) 69 for i := uint64(1); i < N; i++ { 70 bt1.Set(i, i*2) 71 } 72 for i := uint64(1); i < N; i++ { 73 require.Equal(t, i*2, bt1.Get(i)) 74 } 75 bt1Stats := bt1.Stats() 76 require.NoError(t, bt1.Close()) 77 78 // Reopen tree and validate the data. 79 bt2, err := NewTreePersistent(path) 80 require.NoError(t, err) 81 require.Equal(t, bt2.freePage, bt1.freePage) 82 require.Equal(t, bt2.nextPage, bt1.nextPage) 83 bt2Stats := bt2.Stats() 84 // When reopening a tree, the allocated size becomes the file size. 85 // We don't need to compare this, it doesn't change anything in the tree. 86 bt2Stats.Allocated = bt1Stats.Allocated 87 require.Equal(t, bt1Stats, bt2Stats) 88 for i := uint64(1); i < N; i++ { 89 require.Equal(t, i*2, bt2.Get(i)) 90 } 91 // Delete all the data. This will change the value of bt.freePage. 92 bt2.DeleteBelow(math.MaxUint64) 93 bt2Stats = bt2.Stats() 94 require.NoError(t, bt2.Close()) 95 96 // Reopen tree and validate the data. 97 bt3, err := NewTreePersistent(path) 98 require.NoError(t, err) 99 require.Equal(t, bt2.freePage, bt3.freePage) 100 require.Equal(t, bt2.nextPage, bt3.nextPage) 101 bt3Stats := bt3.Stats() 102 bt3Stats.Allocated = bt2Stats.Allocated 103 require.Equal(t, bt2Stats, bt3Stats) 104 require.NoError(t, bt3.Close()) 105 } 106 107 func TestTreeBasic(t *testing.T) { 108 setAndGet := func() { 109 bt := NewTree("TestTreeBasic") 110 defer func() { require.NoError(t, bt.Close()) }() 111 112 N := uint64(1 << 20) 113 mp := make(map[uint64]uint64) 114 for i := uint64(1); i < N; i++ { 115 key := uint64(rand.Int63n(1<<60) + 1) 116 bt.Set(key, key) 117 mp[key] = key 118 } 119 for k, v := range mp { 120 require.Equal(t, v, bt.Get(k)) 121 } 122 123 stats := bt.Stats() 124 t.Logf("final stats: %+v\n", stats) 125 } 126 setAndGet() 127 defer setPageSize(os.Getpagesize()) 128 setPageSize(16 << 5) 129 setAndGet() 130 } 131 132 func TestTreeReset(t *testing.T) { 133 bt := NewTree("TestTreeReset") 134 defer func() { require.NoError(t, bt.Close()) }() 135 136 N := 1 << 10 137 val := rand.Uint64() 138 for i := 0; i < N; i++ { 139 bt.Set(rand.Uint64(), val) 140 } 141 142 // Truncate it to small size that is less than pageSize. 143 bt.Reset() 144 145 stats := bt.Stats() 146 // Verify the tree stats. 147 require.Equal(t, 2, stats.NumPages) 148 require.Equal(t, 1, stats.NumLeafKeys) 149 require.Equal(t, 2*pageSize, stats.Bytes) 150 expectedOcc := float64(1) * 100 / float64(2*maxKeys) 151 require.InDelta(t, expectedOcc, stats.Occupancy, 0.01) 152 require.Zero(t, stats.NumPagesFree) 153 // Check if we can reinsert the data. 154 mp := make(map[uint64]uint64) 155 for i := 0; i < N; i++ { 156 k := rand.Uint64() 157 mp[k] = val 158 bt.Set(k, val) 159 } 160 for k, v := range mp { 161 require.Equal(t, v, bt.Get(k)) 162 } 163 } 164 165 func TestTreeCycle(t *testing.T) { 166 bt := NewTree("TestTreeCycle") 167 defer func() { require.NoError(t, bt.Close()) }() 168 169 val := uint64(0) 170 for i := 0; i < 16; i++ { 171 for j := 0; j < 1e6+i*1e4; j++ { 172 val += 1 173 bt.Set(rand.Uint64(), val) 174 } 175 before := bt.Stats() 176 bt.DeleteBelow(val - 1e4) 177 after := bt.Stats() 178 t.Logf("Cycle %d Done. Before: %+v -> After: %+v\n", i, before, after) 179 } 180 181 bt.DeleteBelow(val) 182 stats := bt.Stats() 183 t.Logf("stats: %+v\n", stats) 184 require.LessOrEqual(t, stats.Occupancy, 1.0) 185 require.GreaterOrEqual(t, stats.NumPagesFree, int(float64(stats.NumPages)*0.95)) 186 } 187 188 func TestTreeIterateKV(t *testing.T) { 189 bt := NewTree("TestTreeIterateKV") 190 defer func() { require.NoError(t, bt.Close()) }() 191 192 // Set entries: (i, i*10) 193 const n = uint64(1 << 20) 194 for i := uint64(1); i <= n; i++ { 195 bt.Set(i, i*10) 196 } 197 198 // Validate entries: (i, i*10) 199 // Set entries: (i, i*20) 200 count := uint64(0) 201 bt.IterateKV(func(k, v uint64) uint64 { 202 require.Equal(t, k*10, v) 203 count++ 204 return k * 20 205 }) 206 require.Equal(t, n, count) 207 208 // Validate entries: (i, i*20) 209 count = uint64(0) 210 bt.IterateKV(func(k, v uint64) uint64 { 211 require.Equal(t, k*20, v) 212 count++ 213 return 0 214 }) 215 require.Equal(t, n, count) 216 } 217 218 func TestOccupancyRatio(t *testing.T) { 219 // atmax 4 keys per node 220 setPageSize(16 * 5) 221 defer setPageSize(os.Getpagesize()) 222 require.Equal(t, 4, maxKeys) 223 224 bt := NewTree("TestOccupancyRatio") 225 defer func() { require.NoError(t, bt.Close()) }() 226 227 expectedRatio := float64(1) * 100 / float64(2*maxKeys) // 2 because we'll have 2 pages. 228 stats := bt.Stats() 229 t.Logf("Expected ratio: %.2f. MaxKeys: %d. Stats: %+v\n", expectedRatio, maxKeys, stats) 230 require.InDelta(t, expectedRatio, stats.Occupancy, 0.01) 231 for i := uint64(1); i <= 3; i++ { 232 bt.Set(i, i) 233 } 234 // Tree structure will be: 235 // [2,Max,_,_] 236 // [1,2,_,_] [3,Max,_,_] 237 expectedRatio = float64(4) * 100 / float64(3*maxKeys) 238 stats = bt.Stats() 239 t.Logf("Expected ratio: %.2f. MaxKeys: %d. Stats: %+v\n", expectedRatio, maxKeys, stats) 240 require.InDelta(t, expectedRatio, stats.Occupancy, 0.01) 241 bt.DeleteBelow(2) 242 // Tree structure will be: 243 // [2,Max,_] 244 // [2,_,_,_] [3,Max,_,_] 245 expectedRatio = float64(3) * 100 / float64(3*maxKeys) 246 stats = bt.Stats() 247 t.Logf("Expected ratio: %.2f. MaxKeys: %d. Stats: %+v\n", expectedRatio, maxKeys, stats) 248 require.InDelta(t, expectedRatio, stats.Occupancy, 0.01) 249 } 250 251 func TestNode(t *testing.T) { 252 n := getNode(make([]byte, pageSize)) 253 for i := uint64(1); i < 16; i *= 2 { 254 n.set(i, i) 255 } 256 n.print(0) 257 require.True(t, 0 == n.get(5)) 258 n.set(5, 5) 259 n.print(0) 260 261 n.setBit(0) 262 require.False(t, n.isLeaf()) 263 n.setBit(bitLeaf) 264 require.True(t, n.isLeaf()) 265 } 266 267 func TestNodeBasic(t *testing.T) { 268 n := getNode(make([]byte, pageSize)) 269 N := uint64(256) 270 mp := make(map[uint64]uint64) 271 for i := uint64(1); i < N; i++ { 272 key := uint64(rand.Int63n(1<<60) + 1) 273 n.set(key, key) 274 mp[key] = key 275 } 276 for k, v := range mp { 277 require.Equal(t, v, n.get(k)) 278 } 279 } 280 281 func TestNode_MoveRight(t *testing.T) { 282 n := getNode(make([]byte, pageSize)) 283 N := uint64(10) 284 for i := uint64(1); i < N; i++ { 285 n.set(i, i) 286 } 287 n.moveRight(5) 288 n.iterate(func(n node, i int) { 289 if i < 5 { 290 require.Equal(t, uint64(i+1), n.key(i)) 291 require.Equal(t, uint64(i+1), n.val(i)) 292 } else if i > 5 { 293 require.Equal(t, uint64(i), n.key(i)) 294 require.Equal(t, uint64(i), n.val(i)) 295 } 296 }) 297 } 298 299 func TestNodeCompact(t *testing.T) { 300 n := getNode(make([]byte, pageSize)) 301 n.setBit(bitLeaf) 302 N := uint64(128) 303 mp := make(map[uint64]uint64) 304 for i := uint64(1); i < N; i++ { 305 key := i 306 val := uint64(10) 307 if i%2 == 0 { 308 val = 20 309 mp[key] = 20 310 } 311 n.set(key, val) 312 } 313 314 require.Equal(t, int(N/2), n.compact(11)) 315 for k, v := range mp { 316 require.Equal(t, v, n.get(k)) 317 } 318 require.Equal(t, uint64(127), n.maxKey()) 319 } 320 321 func BenchmarkPurge(b *testing.B) { 322 N := 16 << 20 323 b.Run("go-mem", func(b *testing.B) { 324 m := make(map[uint64]uint64) 325 for i := 0; i < N; i++ { 326 m[rand.Uint64()] = uint64(i) 327 } 328 }) 329 330 b.Run("btree", func(b *testing.B) { 331 start := time.Now() 332 bt := NewTree("BenchmarkPurge") 333 defer func() { require.NoError(b, bt.Close()) }() 334 for i := 0; i < N; i++ { 335 bt.Set(rand.Uint64(), uint64(i)) 336 } 337 b.Logf("Populate took: %s. stats: %+v\n", time.Since(start), bt.Stats()) 338 339 start = time.Now() 340 before := bt.Stats() 341 bt.DeleteBelow(uint64(N - 1<<20)) 342 after := bt.Stats() 343 b.Logf("Purge took: %s. Before: %+v After: %+v\n", time.Since(start), before, after) 344 }) 345 } 346 347 func BenchmarkWrite(b *testing.B) { 348 b.Run("map", func(b *testing.B) { 349 mp := make(map[uint64]uint64) 350 for n := 0; n < b.N; n++ { 351 k := rand.Uint64() 352 mp[k] = k 353 } 354 }) 355 b.Run("btree", func(b *testing.B) { 356 bt := NewTree("BenchmarkWrite") 357 defer func() { require.NoError(b, bt.Close()) }() 358 b.ResetTimer() 359 for n := 0; n < b.N; n++ { 360 k := rand.Uint64() 361 bt.Set(k, k) 362 } 363 }) 364 } 365 366 // goos: linux 367 // goarch: amd64 368 // pkg: github.com/etecs-ru/ristretto/z 369 // BenchmarkRead/map-4 10845322 109 ns/op 370 // BenchmarkRead/btree-4 2744283 430 ns/op 371 // Cumulative for 10 runs. 372 // name time/op 373 // Read/map-4 105ns ± 1% 374 // Read/btree-4 422ns ± 1% 375 func BenchmarkRead(b *testing.B) { 376 N := 10 << 20 377 mp := make(map[uint64]uint64) 378 for i := 0; i < N; i++ { 379 k := uint64(rand.Intn(2*N)) + 1 380 mp[k] = k 381 } 382 b.Run("map", func(b *testing.B) { 383 for i := 0; i < b.N; i++ { 384 k := uint64(rand.Intn(2 * N)) 385 v, ok := mp[k] 386 _, _ = v, ok 387 } 388 }) 389 390 bt := NewTree("BenchmarkRead") 391 defer func() { require.NoError(b, bt.Close()) }() 392 for i := 0; i < N; i++ { 393 k := uint64(rand.Intn(2*N)) + 1 394 bt.Set(k, k) 395 } 396 stats := bt.Stats() 397 fmt.Printf("Num pages: %d Size: %s\n", stats.NumPages, 398 humanize.IBytes(uint64(stats.Bytes))) 399 fmt.Println("Writes done.") 400 401 b.Run("btree", func(b *testing.B) { 402 for i := 0; i < b.N; i++ { 403 k := uint64(rand.Intn(2*N)) + 1 404 v := bt.Get(k) 405 _ = v 406 } 407 }) 408 } 409 410 func BenchmarkSearch(b *testing.B) { 411 linear := func(n node, k uint64, N int) int { 412 for i := 0; i < N; i++ { 413 if ki := n.key(i); ki >= k { 414 return i 415 } 416 } 417 return N 418 } 419 binary := func(n node, k uint64, N int) int { 420 return sort.Search(N, func(i int) bool { 421 return n.key(i) >= k 422 }) 423 } 424 unroll4 := func(n node, k uint64, N int) int { 425 if len(n[:2*N]) < 8 { 426 for i := 0; i < N; i++ { 427 if ki := n.key(i); ki >= k { 428 return i 429 } 430 } 431 return N 432 } 433 return int(simd.Search(n[:2*N], k)) 434 } 435 436 jumpBy := []int{8, 16, 32, 64, 128, 196, 255} 437 tempDir := b.TempDir() 438 for _, sz := range jumpBy { 439 f, err := os.CreateTemp(tempDir, "tree") 440 require.NoError(b, err) 441 442 mf, err := OpenMmapFileUsing(f, pageSize, true) 443 if err != ErrNewFileCreateFailed { 444 require.NoError(b, err) 445 } 446 447 n := getNode(mf.Data) 448 for i := 1; i <= sz; i++ { 449 n.set(uint64(i), uint64(i)) 450 } 451 452 b.Run(fmt.Sprintf("linear-%d", sz), func(b *testing.B) { 453 for i := 0; i < b.N; i++ { 454 tmp = linear(n, math.MaxUint64, sz) 455 } 456 }) 457 b.Run(fmt.Sprintf("binary-%d", sz), func(b *testing.B) { 458 for i := 0; i < b.N; i++ { 459 tmp = binary(n, uint64(sz), sz) 460 } 461 }) 462 b.Run(fmt.Sprintf("unrolled-asm-%d", sz), func(b *testing.B) { 463 for i := 0; i < b.N; i++ { 464 tmp = unroll4(n, math.MaxUint64, sz) 465 } 466 }) 467 mf.Close(0) 468 os.Remove(f.Name()) 469 } 470 } 471 472 // This benchmark when run on dgus-delta, performed marginally better with threshold=32. 473 // CustomSearch/sz-64_th-1-4 49.9ns ± 1% (fully binary) 474 // CustomSearch/sz-64_th-16-4 63.3ns ± 0% 475 // CustomSearch/sz-64_th-32-4 58.7ns ± 7% 476 // CustomSearch/sz-64_th-64-4 63.9ns ± 7% (fully linear) 477 478 // CustomSearch/sz-128_th-32-4 70.2ns ± 1% 479 480 // CustomSearch/sz-255_th-1-4 77.3ns ± 0% (fully binary) 481 // CustomSearch/sz-255_th-16-4 68.2ns ± 1% 482 // CustomSearch/sz-255_th-32-4 67.0ns ± 7% 483 // CustomSearch/sz-255_th-64-4 85.5ns ±19% 484 // CustomSearch/sz-255_th-256-4 129ns ± 6% (fully linear) 485 486 func BenchmarkCustomSearch(b *testing.B) { 487 mixed := func(n node, k uint64, N int, threshold int) int { 488 lo, hi := 0, N 489 // Reduce the search space using binary search and then do linear search. 490 for hi-lo > threshold { 491 mid := (hi + lo) / 2 492 km := n.key(mid) 493 if k == km { 494 return mid 495 } 496 if k > km { 497 // key is greater than the key at mid, so move right. 498 lo = mid + 1 499 } else { 500 // else move left. 501 hi = mid 502 } 503 } 504 for i := lo; i <= hi; i++ { 505 if ki := n.key(i); ki >= k { 506 return i 507 } 508 } 509 return N 510 } 511 512 for _, sz := range []int{64, 128, 255} { 513 n := getNode(make([]byte, pageSize)) 514 for i := 1; i <= sz; i++ { 515 n.set(uint64(i), uint64(i)) 516 } 517 518 mk := sz + 1 519 for th := 1; th <= sz+1; th *= 2 { 520 b.Run(fmt.Sprintf("sz-%d th-%d", sz, th), func(b *testing.B) { 521 for i := 0; i < b.N; i++ { 522 k := uint64(rand.Intn(mk)) 523 tmp = mixed(n, k, sz, th) 524 } 525 }) 526 } 527 } 528 }