github.com/etecs-ru/ristretto@v0.9.1/z/btree_test.go (about)

     1  /*
     2   * Copyright 2020 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package z
    18  
    19  import (
    20  	"fmt"
    21  	"math"
    22  	"math/rand"
    23  	"os"
    24  	"path/filepath"
    25  	"sort"
    26  	"testing"
    27  	"time"
    28  
    29  	"github.com/dustin/go-humanize"
    30  	"github.com/etecs-ru/ristretto/z/simd"
    31  	"github.com/stretchr/testify/require"
    32  )
    33  
    34  var tmp int
    35  
    36  func setPageSize(sz int) {
    37  	pageSize = sz
    38  	maxKeys = (pageSize / 16) - 1
    39  }
    40  
    41  func TestTree(t *testing.T) {
    42  	bt := NewTree("TestTree")
    43  	defer func() { require.NoError(t, bt.Close()) }()
    44  
    45  	N := uint64(256 * 256)
    46  	for i := uint64(1); i < N; i++ {
    47  		bt.Set(i, i)
    48  	}
    49  	for i := uint64(1); i < N; i++ {
    50  		require.Equal(t, i, bt.Get(i))
    51  	}
    52  
    53  	bt.DeleteBelow(100)
    54  	for i := uint64(1); i < 100; i++ {
    55  		require.Equal(t, uint64(0), bt.Get(i))
    56  	}
    57  	for i := uint64(100); i < N; i++ {
    58  		require.Equal(t, i, bt.Get(i))
    59  	}
    60  }
    61  
    62  func TestTreePersistent(t *testing.T) {
    63  	path := filepath.Join(t.TempDir(), "tree.buf")
    64  
    65  	// Create a tree and validate the data.
    66  	bt1, err := NewTreePersistent(path)
    67  	require.NoError(t, err)
    68  	N := uint64(64 << 10)
    69  	for i := uint64(1); i < N; i++ {
    70  		bt1.Set(i, i*2)
    71  	}
    72  	for i := uint64(1); i < N; i++ {
    73  		require.Equal(t, i*2, bt1.Get(i))
    74  	}
    75  	bt1Stats := bt1.Stats()
    76  	require.NoError(t, bt1.Close())
    77  
    78  	// Reopen tree and validate the data.
    79  	bt2, err := NewTreePersistent(path)
    80  	require.NoError(t, err)
    81  	require.Equal(t, bt2.freePage, bt1.freePage)
    82  	require.Equal(t, bt2.nextPage, bt1.nextPage)
    83  	bt2Stats := bt2.Stats()
    84  	// When reopening a tree, the allocated size becomes the file size.
    85  	// We don't need to compare this, it doesn't change anything in the tree.
    86  	bt2Stats.Allocated = bt1Stats.Allocated
    87  	require.Equal(t, bt1Stats, bt2Stats)
    88  	for i := uint64(1); i < N; i++ {
    89  		require.Equal(t, i*2, bt2.Get(i))
    90  	}
    91  	// Delete all the data. This will change the value of bt.freePage.
    92  	bt2.DeleteBelow(math.MaxUint64)
    93  	bt2Stats = bt2.Stats()
    94  	require.NoError(t, bt2.Close())
    95  
    96  	// Reopen tree and validate the data.
    97  	bt3, err := NewTreePersistent(path)
    98  	require.NoError(t, err)
    99  	require.Equal(t, bt2.freePage, bt3.freePage)
   100  	require.Equal(t, bt2.nextPage, bt3.nextPage)
   101  	bt3Stats := bt3.Stats()
   102  	bt3Stats.Allocated = bt2Stats.Allocated
   103  	require.Equal(t, bt2Stats, bt3Stats)
   104  	require.NoError(t, bt3.Close())
   105  }
   106  
   107  func TestTreeBasic(t *testing.T) {
   108  	setAndGet := func() {
   109  		bt := NewTree("TestTreeBasic")
   110  		defer func() { require.NoError(t, bt.Close()) }()
   111  
   112  		N := uint64(1 << 20)
   113  		mp := make(map[uint64]uint64)
   114  		for i := uint64(1); i < N; i++ {
   115  			key := uint64(rand.Int63n(1<<60) + 1)
   116  			bt.Set(key, key)
   117  			mp[key] = key
   118  		}
   119  		for k, v := range mp {
   120  			require.Equal(t, v, bt.Get(k))
   121  		}
   122  
   123  		stats := bt.Stats()
   124  		t.Logf("final stats: %+v\n", stats)
   125  	}
   126  	setAndGet()
   127  	defer setPageSize(os.Getpagesize())
   128  	setPageSize(16 << 5)
   129  	setAndGet()
   130  }
   131  
   132  func TestTreeReset(t *testing.T) {
   133  	bt := NewTree("TestTreeReset")
   134  	defer func() { require.NoError(t, bt.Close()) }()
   135  
   136  	N := 1 << 10
   137  	val := rand.Uint64()
   138  	for i := 0; i < N; i++ {
   139  		bt.Set(rand.Uint64(), val)
   140  	}
   141  
   142  	// Truncate it to small size that is less than pageSize.
   143  	bt.Reset()
   144  
   145  	stats := bt.Stats()
   146  	// Verify the tree stats.
   147  	require.Equal(t, 2, stats.NumPages)
   148  	require.Equal(t, 1, stats.NumLeafKeys)
   149  	require.Equal(t, 2*pageSize, stats.Bytes)
   150  	expectedOcc := float64(1) * 100 / float64(2*maxKeys)
   151  	require.InDelta(t, expectedOcc, stats.Occupancy, 0.01)
   152  	require.Zero(t, stats.NumPagesFree)
   153  	// Check if we can reinsert the data.
   154  	mp := make(map[uint64]uint64)
   155  	for i := 0; i < N; i++ {
   156  		k := rand.Uint64()
   157  		mp[k] = val
   158  		bt.Set(k, val)
   159  	}
   160  	for k, v := range mp {
   161  		require.Equal(t, v, bt.Get(k))
   162  	}
   163  }
   164  
   165  func TestTreeCycle(t *testing.T) {
   166  	bt := NewTree("TestTreeCycle")
   167  	defer func() { require.NoError(t, bt.Close()) }()
   168  
   169  	val := uint64(0)
   170  	for i := 0; i < 16; i++ {
   171  		for j := 0; j < 1e6+i*1e4; j++ {
   172  			val += 1
   173  			bt.Set(rand.Uint64(), val)
   174  		}
   175  		before := bt.Stats()
   176  		bt.DeleteBelow(val - 1e4)
   177  		after := bt.Stats()
   178  		t.Logf("Cycle %d Done. Before: %+v -> After: %+v\n", i, before, after)
   179  	}
   180  
   181  	bt.DeleteBelow(val)
   182  	stats := bt.Stats()
   183  	t.Logf("stats: %+v\n", stats)
   184  	require.LessOrEqual(t, stats.Occupancy, 1.0)
   185  	require.GreaterOrEqual(t, stats.NumPagesFree, int(float64(stats.NumPages)*0.95))
   186  }
   187  
   188  func TestTreeIterateKV(t *testing.T) {
   189  	bt := NewTree("TestTreeIterateKV")
   190  	defer func() { require.NoError(t, bt.Close()) }()
   191  
   192  	// Set entries: (i, i*10)
   193  	const n = uint64(1 << 20)
   194  	for i := uint64(1); i <= n; i++ {
   195  		bt.Set(i, i*10)
   196  	}
   197  
   198  	// Validate entries: (i, i*10)
   199  	// Set entries: (i, i*20)
   200  	count := uint64(0)
   201  	bt.IterateKV(func(k, v uint64) uint64 {
   202  		require.Equal(t, k*10, v)
   203  		count++
   204  		return k * 20
   205  	})
   206  	require.Equal(t, n, count)
   207  
   208  	// Validate entries: (i, i*20)
   209  	count = uint64(0)
   210  	bt.IterateKV(func(k, v uint64) uint64 {
   211  		require.Equal(t, k*20, v)
   212  		count++
   213  		return 0
   214  	})
   215  	require.Equal(t, n, count)
   216  }
   217  
   218  func TestOccupancyRatio(t *testing.T) {
   219  	// atmax 4 keys per node
   220  	setPageSize(16 * 5)
   221  	defer setPageSize(os.Getpagesize())
   222  	require.Equal(t, 4, maxKeys)
   223  
   224  	bt := NewTree("TestOccupancyRatio")
   225  	defer func() { require.NoError(t, bt.Close()) }()
   226  
   227  	expectedRatio := float64(1) * 100 / float64(2*maxKeys) // 2 because we'll have 2 pages.
   228  	stats := bt.Stats()
   229  	t.Logf("Expected ratio: %.2f. MaxKeys: %d. Stats: %+v\n", expectedRatio, maxKeys, stats)
   230  	require.InDelta(t, expectedRatio, stats.Occupancy, 0.01)
   231  	for i := uint64(1); i <= 3; i++ {
   232  		bt.Set(i, i)
   233  	}
   234  	// Tree structure will be:
   235  	//    [2,Max,_,_]
   236  	//  [1,2,_,_]  [3,Max,_,_]
   237  	expectedRatio = float64(4) * 100 / float64(3*maxKeys)
   238  	stats = bt.Stats()
   239  	t.Logf("Expected ratio: %.2f. MaxKeys: %d. Stats: %+v\n", expectedRatio, maxKeys, stats)
   240  	require.InDelta(t, expectedRatio, stats.Occupancy, 0.01)
   241  	bt.DeleteBelow(2)
   242  	// Tree structure will be:
   243  	//    [2,Max,_]
   244  	//  [2,_,_,_]  [3,Max,_,_]
   245  	expectedRatio = float64(3) * 100 / float64(3*maxKeys)
   246  	stats = bt.Stats()
   247  	t.Logf("Expected ratio: %.2f. MaxKeys: %d. Stats: %+v\n", expectedRatio, maxKeys, stats)
   248  	require.InDelta(t, expectedRatio, stats.Occupancy, 0.01)
   249  }
   250  
   251  func TestNode(t *testing.T) {
   252  	n := getNode(make([]byte, pageSize))
   253  	for i := uint64(1); i < 16; i *= 2 {
   254  		n.set(i, i)
   255  	}
   256  	n.print(0)
   257  	require.True(t, 0 == n.get(5))
   258  	n.set(5, 5)
   259  	n.print(0)
   260  
   261  	n.setBit(0)
   262  	require.False(t, n.isLeaf())
   263  	n.setBit(bitLeaf)
   264  	require.True(t, n.isLeaf())
   265  }
   266  
   267  func TestNodeBasic(t *testing.T) {
   268  	n := getNode(make([]byte, pageSize))
   269  	N := uint64(256)
   270  	mp := make(map[uint64]uint64)
   271  	for i := uint64(1); i < N; i++ {
   272  		key := uint64(rand.Int63n(1<<60) + 1)
   273  		n.set(key, key)
   274  		mp[key] = key
   275  	}
   276  	for k, v := range mp {
   277  		require.Equal(t, v, n.get(k))
   278  	}
   279  }
   280  
   281  func TestNode_MoveRight(t *testing.T) {
   282  	n := getNode(make([]byte, pageSize))
   283  	N := uint64(10)
   284  	for i := uint64(1); i < N; i++ {
   285  		n.set(i, i)
   286  	}
   287  	n.moveRight(5)
   288  	n.iterate(func(n node, i int) {
   289  		if i < 5 {
   290  			require.Equal(t, uint64(i+1), n.key(i))
   291  			require.Equal(t, uint64(i+1), n.val(i))
   292  		} else if i > 5 {
   293  			require.Equal(t, uint64(i), n.key(i))
   294  			require.Equal(t, uint64(i), n.val(i))
   295  		}
   296  	})
   297  }
   298  
   299  func TestNodeCompact(t *testing.T) {
   300  	n := getNode(make([]byte, pageSize))
   301  	n.setBit(bitLeaf)
   302  	N := uint64(128)
   303  	mp := make(map[uint64]uint64)
   304  	for i := uint64(1); i < N; i++ {
   305  		key := i
   306  		val := uint64(10)
   307  		if i%2 == 0 {
   308  			val = 20
   309  			mp[key] = 20
   310  		}
   311  		n.set(key, val)
   312  	}
   313  
   314  	require.Equal(t, int(N/2), n.compact(11))
   315  	for k, v := range mp {
   316  		require.Equal(t, v, n.get(k))
   317  	}
   318  	require.Equal(t, uint64(127), n.maxKey())
   319  }
   320  
   321  func BenchmarkPurge(b *testing.B) {
   322  	N := 16 << 20
   323  	b.Run("go-mem", func(b *testing.B) {
   324  		m := make(map[uint64]uint64)
   325  		for i := 0; i < N; i++ {
   326  			m[rand.Uint64()] = uint64(i)
   327  		}
   328  	})
   329  
   330  	b.Run("btree", func(b *testing.B) {
   331  		start := time.Now()
   332  		bt := NewTree("BenchmarkPurge")
   333  		defer func() { require.NoError(b, bt.Close()) }()
   334  		for i := 0; i < N; i++ {
   335  			bt.Set(rand.Uint64(), uint64(i))
   336  		}
   337  		b.Logf("Populate took: %s. stats: %+v\n", time.Since(start), bt.Stats())
   338  
   339  		start = time.Now()
   340  		before := bt.Stats()
   341  		bt.DeleteBelow(uint64(N - 1<<20))
   342  		after := bt.Stats()
   343  		b.Logf("Purge took: %s. Before: %+v After: %+v\n", time.Since(start), before, after)
   344  	})
   345  }
   346  
   347  func BenchmarkWrite(b *testing.B) {
   348  	b.Run("map", func(b *testing.B) {
   349  		mp := make(map[uint64]uint64)
   350  		for n := 0; n < b.N; n++ {
   351  			k := rand.Uint64()
   352  			mp[k] = k
   353  		}
   354  	})
   355  	b.Run("btree", func(b *testing.B) {
   356  		bt := NewTree("BenchmarkWrite")
   357  		defer func() { require.NoError(b, bt.Close()) }()
   358  		b.ResetTimer()
   359  		for n := 0; n < b.N; n++ {
   360  			k := rand.Uint64()
   361  			bt.Set(k, k)
   362  		}
   363  	})
   364  }
   365  
   366  // goos: linux
   367  // goarch: amd64
   368  // pkg: github.com/etecs-ru/ristretto/z
   369  // BenchmarkRead/map-4         	10845322	       109 ns/op
   370  // BenchmarkRead/btree-4       	 2744283	       430 ns/op
   371  // Cumulative for 10 runs.
   372  // name          time/op
   373  // Read/map-4    105ns ± 1%
   374  // Read/btree-4  422ns ± 1%
   375  func BenchmarkRead(b *testing.B) {
   376  	N := 10 << 20
   377  	mp := make(map[uint64]uint64)
   378  	for i := 0; i < N; i++ {
   379  		k := uint64(rand.Intn(2*N)) + 1
   380  		mp[k] = k
   381  	}
   382  	b.Run("map", func(b *testing.B) {
   383  		for i := 0; i < b.N; i++ {
   384  			k := uint64(rand.Intn(2 * N))
   385  			v, ok := mp[k]
   386  			_, _ = v, ok
   387  		}
   388  	})
   389  
   390  	bt := NewTree("BenchmarkRead")
   391  	defer func() { require.NoError(b, bt.Close()) }()
   392  	for i := 0; i < N; i++ {
   393  		k := uint64(rand.Intn(2*N)) + 1
   394  		bt.Set(k, k)
   395  	}
   396  	stats := bt.Stats()
   397  	fmt.Printf("Num pages: %d Size: %s\n", stats.NumPages,
   398  		humanize.IBytes(uint64(stats.Bytes)))
   399  	fmt.Println("Writes done.")
   400  
   401  	b.Run("btree", func(b *testing.B) {
   402  		for i := 0; i < b.N; i++ {
   403  			k := uint64(rand.Intn(2*N)) + 1
   404  			v := bt.Get(k)
   405  			_ = v
   406  		}
   407  	})
   408  }
   409  
   410  func BenchmarkSearch(b *testing.B) {
   411  	linear := func(n node, k uint64, N int) int {
   412  		for i := 0; i < N; i++ {
   413  			if ki := n.key(i); ki >= k {
   414  				return i
   415  			}
   416  		}
   417  		return N
   418  	}
   419  	binary := func(n node, k uint64, N int) int {
   420  		return sort.Search(N, func(i int) bool {
   421  			return n.key(i) >= k
   422  		})
   423  	}
   424  	unroll4 := func(n node, k uint64, N int) int {
   425  		if len(n[:2*N]) < 8 {
   426  			for i := 0; i < N; i++ {
   427  				if ki := n.key(i); ki >= k {
   428  					return i
   429  				}
   430  			}
   431  			return N
   432  		}
   433  		return int(simd.Search(n[:2*N], k))
   434  	}
   435  
   436  	jumpBy := []int{8, 16, 32, 64, 128, 196, 255}
   437  	tempDir := b.TempDir()
   438  	for _, sz := range jumpBy {
   439  		f, err := os.CreateTemp(tempDir, "tree")
   440  		require.NoError(b, err)
   441  
   442  		mf, err := OpenMmapFileUsing(f, pageSize, true)
   443  		if err != ErrNewFileCreateFailed {
   444  			require.NoError(b, err)
   445  		}
   446  
   447  		n := getNode(mf.Data)
   448  		for i := 1; i <= sz; i++ {
   449  			n.set(uint64(i), uint64(i))
   450  		}
   451  
   452  		b.Run(fmt.Sprintf("linear-%d", sz), func(b *testing.B) {
   453  			for i := 0; i < b.N; i++ {
   454  				tmp = linear(n, math.MaxUint64, sz)
   455  			}
   456  		})
   457  		b.Run(fmt.Sprintf("binary-%d", sz), func(b *testing.B) {
   458  			for i := 0; i < b.N; i++ {
   459  				tmp = binary(n, uint64(sz), sz)
   460  			}
   461  		})
   462  		b.Run(fmt.Sprintf("unrolled-asm-%d", sz), func(b *testing.B) {
   463  			for i := 0; i < b.N; i++ {
   464  				tmp = unroll4(n, math.MaxUint64, sz)
   465  			}
   466  		})
   467  		mf.Close(0)
   468  		os.Remove(f.Name())
   469  	}
   470  }
   471  
   472  // This benchmark when run on dgus-delta, performed marginally better with threshold=32.
   473  // CustomSearch/sz-64_th-1-4     49.9ns ± 1% (fully binary)
   474  // CustomSearch/sz-64_th-16-4    63.3ns ± 0%
   475  // CustomSearch/sz-64_th-32-4    58.7ns ± 7%
   476  // CustomSearch/sz-64_th-64-4    63.9ns ± 7% (fully linear)
   477  
   478  // CustomSearch/sz-128_th-32-4   70.2ns ± 1%
   479  
   480  // CustomSearch/sz-255_th-1-4    77.3ns ± 0% (fully binary)
   481  // CustomSearch/sz-255_th-16-4   68.2ns ± 1%
   482  // CustomSearch/sz-255_th-32-4   67.0ns ± 7%
   483  // CustomSearch/sz-255_th-64-4   85.5ns ±19%
   484  // CustomSearch/sz-255_th-256-4   129ns ± 6% (fully linear)
   485  
   486  func BenchmarkCustomSearch(b *testing.B) {
   487  	mixed := func(n node, k uint64, N int, threshold int) int {
   488  		lo, hi := 0, N
   489  		// Reduce the search space using binary search and then do linear search.
   490  		for hi-lo > threshold {
   491  			mid := (hi + lo) / 2
   492  			km := n.key(mid)
   493  			if k == km {
   494  				return mid
   495  			}
   496  			if k > km {
   497  				// key is greater than the key at mid, so move right.
   498  				lo = mid + 1
   499  			} else {
   500  				// else move left.
   501  				hi = mid
   502  			}
   503  		}
   504  		for i := lo; i <= hi; i++ {
   505  			if ki := n.key(i); ki >= k {
   506  				return i
   507  			}
   508  		}
   509  		return N
   510  	}
   511  
   512  	for _, sz := range []int{64, 128, 255} {
   513  		n := getNode(make([]byte, pageSize))
   514  		for i := 1; i <= sz; i++ {
   515  			n.set(uint64(i), uint64(i))
   516  		}
   517  
   518  		mk := sz + 1
   519  		for th := 1; th <= sz+1; th *= 2 {
   520  			b.Run(fmt.Sprintf("sz-%d th-%d", sz, th), func(b *testing.B) {
   521  				for i := 0; i < b.N; i++ {
   522  					k := uint64(rand.Intn(mk))
   523  					tmp = mixed(n, k, sz, th)
   524  				}
   525  			})
   526  		}
   527  	}
   528  }