github.com/outcaste-io/ristretto@v0.2.3/z/btree_test.go (about)

     1  /*
     2   * Copyright 2020 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package z
    18  
    19  import (
    20  	"fmt"
    21  	"io/ioutil"
    22  	"math"
    23  	"math/rand"
    24  	"os"
    25  	"path/filepath"
    26  	"sort"
    27  	"testing"
    28  	"time"
    29  
    30  	"github.com/outcaste-io/ristretto/z/simd"
    31  	"github.com/dustin/go-humanize"
    32  	"github.com/stretchr/testify/require"
    33  )
    34  
    35  var tmp int
    36  
    37  func setPageSize(sz int) {
    38  	pageSize = sz
    39  	maxKeys = (pageSize / 16) - 1
    40  }
    41  
    42  func TestTree(t *testing.T) {
    43  	bt := NewTree("TestTree")
    44  	defer func() { require.NoError(t, bt.Close()) }()
    45  
    46  	N := uint64(256 * 256)
    47  	for i := uint64(1); i < N; i++ {
    48  		bt.Set(i, i)
    49  	}
    50  	for i := uint64(1); i < N; i++ {
    51  		require.Equal(t, i, bt.Get(i))
    52  	}
    53  
    54  	bt.DeleteBelow(100)
    55  	for i := uint64(1); i < 100; i++ {
    56  		require.Equal(t, uint64(0), bt.Get(i))
    57  	}
    58  	for i := uint64(100); i < N; i++ {
    59  		require.Equal(t, i, bt.Get(i))
    60  	}
    61  }
    62  
    63  func TestTreePersistent(t *testing.T) {
    64  	dir, err := ioutil.TempDir("", "")
    65  	require.NoError(t, err)
    66  	defer os.RemoveAll(dir)
    67  	path := filepath.Join(dir, "tree.buf")
    68  
    69  	// Create a tree and validate the data.
    70  	bt1, err := NewTreePersistent(path)
    71  	require.NoError(t, err)
    72  	N := uint64(64 << 10)
    73  	for i := uint64(1); i < N; i++ {
    74  		bt1.Set(i, i*2)
    75  	}
    76  	for i := uint64(1); i < N; i++ {
    77  		require.Equal(t, i*2, bt1.Get(i))
    78  	}
    79  	bt1Stats := bt1.Stats()
    80  	require.NoError(t, bt1.Close())
    81  
    82  	// Reopen tree and validate the data.
    83  	bt2, err := NewTreePersistent(path)
    84  	require.NoError(t, err)
    85  	require.Equal(t, bt2.freePage, bt1.freePage)
    86  	require.Equal(t, bt2.nextPage, bt1.nextPage)
    87  	bt2Stats := bt2.Stats()
    88  	// When reopening a tree, the allocated size becomes the file size.
    89  	// We don't need to compare this, it doesn't change anything in the tree.
    90  	bt2Stats.Allocated = bt1Stats.Allocated
    91  	require.Equal(t, bt1Stats, bt2Stats)
    92  	for i := uint64(1); i < N; i++ {
    93  		require.Equal(t, i*2, bt2.Get(i))
    94  	}
    95  	// Delete all the data. This will change the value of bt.freePage.
    96  	bt2.DeleteBelow(math.MaxUint64)
    97  	bt2Stats = bt2.Stats()
    98  	require.NoError(t, bt2.Close())
    99  
   100  	// Reopen tree and validate the data.
   101  	bt3, err := NewTreePersistent(path)
   102  	require.NoError(t, err)
   103  	require.Equal(t, bt2.freePage, bt3.freePage)
   104  	require.Equal(t, bt2.nextPage, bt3.nextPage)
   105  	bt3Stats := bt3.Stats()
   106  	bt3Stats.Allocated = bt2Stats.Allocated
   107  	require.Equal(t, bt2Stats, bt3Stats)
   108  	require.NoError(t, bt3.Close())
   109  }
   110  
   111  func TestTreeBasic(t *testing.T) {
   112  	setAndGet := func() {
   113  		bt := NewTree("TestTreeBasic")
   114  		defer func() { require.NoError(t, bt.Close()) }()
   115  
   116  		N := uint64(1 << 20)
   117  		mp := make(map[uint64]uint64)
   118  		for i := uint64(1); i < N; i++ {
   119  			key := uint64(rand.Int63n(1<<60) + 1)
   120  			bt.Set(key, key)
   121  			mp[key] = key
   122  		}
   123  		for k, v := range mp {
   124  			require.Equal(t, v, bt.Get(k))
   125  		}
   126  
   127  		stats := bt.Stats()
   128  		t.Logf("final stats: %+v\n", stats)
   129  	}
   130  	setAndGet()
   131  	defer setPageSize(os.Getpagesize())
   132  	setPageSize(16 << 5)
   133  	setAndGet()
   134  }
   135  
   136  func TestTreeReset(t *testing.T) {
   137  	bt := NewTree("TestTreeReset")
   138  	defer func() { require.NoError(t, bt.Close()) }()
   139  
   140  	N := 1 << 10
   141  	val := rand.Uint64()
   142  	for i := 0; i < N; i++ {
   143  		bt.Set(rand.Uint64(), val)
   144  	}
   145  
   146  	// Truncate it to small size that is less than pageSize.
   147  	bt.Reset()
   148  
   149  	stats := bt.Stats()
   150  	// Verify the tree stats.
   151  	require.Equal(t, 2, stats.NumPages)
   152  	require.Equal(t, 1, stats.NumLeafKeys)
   153  	require.Equal(t, 2*pageSize, stats.Bytes)
   154  	expectedOcc := float64(1) * 100 / float64(2*maxKeys)
   155  	require.InDelta(t, expectedOcc, stats.Occupancy, 0.01)
   156  	require.Zero(t, stats.NumPagesFree)
   157  	// Check if we can reinsert the data.
   158  	mp := make(map[uint64]uint64)
   159  	for i := 0; i < N; i++ {
   160  		k := rand.Uint64()
   161  		mp[k] = val
   162  		bt.Set(k, val)
   163  	}
   164  	for k, v := range mp {
   165  		require.Equal(t, v, bt.Get(k))
   166  	}
   167  }
   168  
   169  func TestTreeCycle(t *testing.T) {
   170  	bt := NewTree("TestTreeCycle")
   171  	defer func() { require.NoError(t, bt.Close()) }()
   172  
   173  	val := uint64(0)
   174  	for i := 0; i < 16; i++ {
   175  		for j := 0; j < 1e6+i*1e4; j++ {
   176  			val += 1
   177  			bt.Set(rand.Uint64(), val)
   178  		}
   179  		before := bt.Stats()
   180  		bt.DeleteBelow(val - 1e4)
   181  		after := bt.Stats()
   182  		t.Logf("Cycle %d Done. Before: %+v -> After: %+v\n", i, before, after)
   183  	}
   184  
   185  	bt.DeleteBelow(val)
   186  	stats := bt.Stats()
   187  	t.Logf("stats: %+v\n", stats)
   188  	require.LessOrEqual(t, stats.Occupancy, 1.0)
   189  	require.GreaterOrEqual(t, stats.NumPagesFree, int(float64(stats.NumPages)*0.95))
   190  }
   191  
   192  func TestTreeIterateKV(t *testing.T) {
   193  	bt := NewTree("TestTreeIterateKV")
   194  	defer func() { require.NoError(t, bt.Close()) }()
   195  
   196  	// Set entries: (i, i*10)
   197  	const n = uint64(1 << 20)
   198  	for i := uint64(1); i <= n; i++ {
   199  		bt.Set(i, i*10)
   200  	}
   201  
   202  	// Validate entries: (i, i*10)
   203  	// Set entries: (i, i*20)
   204  	count := uint64(0)
   205  	bt.IterateKV(func(k, v uint64) uint64 {
   206  		require.Equal(t, k*10, v)
   207  		count++
   208  		return k * 20
   209  	})
   210  	require.Equal(t, n, count)
   211  
   212  	// Validate entries: (i, i*20)
   213  	count = uint64(0)
   214  	bt.IterateKV(func(k, v uint64) uint64 {
   215  		require.Equal(t, k*20, v)
   216  		count++
   217  		return 0
   218  	})
   219  	require.Equal(t, n, count)
   220  }
   221  
   222  func TestOccupancyRatio(t *testing.T) {
   223  	// atmax 4 keys per node
   224  	setPageSize(16 * 5)
   225  	defer setPageSize(os.Getpagesize())
   226  	require.Equal(t, 4, maxKeys)
   227  
   228  	bt := NewTree("TestOccupancyRatio")
   229  	defer func() { require.NoError(t, bt.Close()) }()
   230  
   231  	expectedRatio := float64(1) * 100 / float64(2*maxKeys) // 2 because we'll have 2 pages.
   232  	stats := bt.Stats()
   233  	t.Logf("Expected ratio: %.2f. MaxKeys: %d. Stats: %+v\n", expectedRatio, maxKeys, stats)
   234  	require.InDelta(t, expectedRatio, stats.Occupancy, 0.01)
   235  	for i := uint64(1); i <= 3; i++ {
   236  		bt.Set(i, i)
   237  	}
   238  	// Tree structure will be:
   239  	//    [2,Max,_,_]
   240  	//  [1,2,_,_]  [3,Max,_,_]
   241  	expectedRatio = float64(4) * 100 / float64(3*maxKeys)
   242  	stats = bt.Stats()
   243  	t.Logf("Expected ratio: %.2f. MaxKeys: %d. Stats: %+v\n", expectedRatio, maxKeys, stats)
   244  	require.InDelta(t, expectedRatio, stats.Occupancy, 0.01)
   245  	bt.DeleteBelow(2)
   246  	// Tree structure will be:
   247  	//    [2,Max,_]
   248  	//  [2,_,_,_]  [3,Max,_,_]
   249  	expectedRatio = float64(3) * 100 / float64(3*maxKeys)
   250  	stats = bt.Stats()
   251  	t.Logf("Expected ratio: %.2f. MaxKeys: %d. Stats: %+v\n", expectedRatio, maxKeys, stats)
   252  	require.InDelta(t, expectedRatio, stats.Occupancy, 0.01)
   253  }
   254  
   255  func TestNode(t *testing.T) {
   256  	n := getNode(make([]byte, pageSize))
   257  	for i := uint64(1); i < 16; i *= 2 {
   258  		n.set(i, i)
   259  	}
   260  	n.print(0)
   261  	require.True(t, 0 == n.get(5))
   262  	n.set(5, 5)
   263  	n.print(0)
   264  
   265  	n.setBit(0)
   266  	require.False(t, n.isLeaf())
   267  	n.setBit(bitLeaf)
   268  	require.True(t, n.isLeaf())
   269  }
   270  
   271  func TestNodeBasic(t *testing.T) {
   272  	n := getNode(make([]byte, pageSize))
   273  	N := uint64(256)
   274  	mp := make(map[uint64]uint64)
   275  	for i := uint64(1); i < N; i++ {
   276  		key := uint64(rand.Int63n(1<<60) + 1)
   277  		n.set(key, key)
   278  		mp[key] = key
   279  	}
   280  	for k, v := range mp {
   281  		require.Equal(t, v, n.get(k))
   282  	}
   283  }
   284  
   285  func TestNode_MoveRight(t *testing.T) {
   286  	n := getNode(make([]byte, pageSize))
   287  	N := uint64(10)
   288  	for i := uint64(1); i < N; i++ {
   289  		n.set(i, i)
   290  	}
   291  	n.moveRight(5)
   292  	n.iterate(func(n node, i int) {
   293  		if i < 5 {
   294  			require.Equal(t, uint64(i+1), n.key(i))
   295  			require.Equal(t, uint64(i+1), n.val(i))
   296  		} else if i > 5 {
   297  			require.Equal(t, uint64(i), n.key(i))
   298  			require.Equal(t, uint64(i), n.val(i))
   299  		}
   300  	})
   301  }
   302  
   303  func TestNodeCompact(t *testing.T) {
   304  	n := getNode(make([]byte, pageSize))
   305  	n.setBit(bitLeaf)
   306  	N := uint64(128)
   307  	mp := make(map[uint64]uint64)
   308  	for i := uint64(1); i < N; i++ {
   309  		key := i
   310  		val := uint64(10)
   311  		if i%2 == 0 {
   312  			val = 20
   313  			mp[key] = 20
   314  		}
   315  		n.set(key, val)
   316  	}
   317  
   318  	require.Equal(t, int(N/2), n.compact(11))
   319  	for k, v := range mp {
   320  		require.Equal(t, v, n.get(k))
   321  	}
   322  	require.Equal(t, uint64(127), n.maxKey())
   323  }
   324  
   325  func BenchmarkPurge(b *testing.B) {
   326  	N := 16 << 20
   327  	b.Run("go-mem", func(b *testing.B) {
   328  		m := make(map[uint64]uint64)
   329  		for i := 0; i < N; i++ {
   330  			m[rand.Uint64()] = uint64(i)
   331  		}
   332  	})
   333  
   334  	b.Run("btree", func(b *testing.B) {
   335  		start := time.Now()
   336  		bt := NewTree("BenchmarkPurge")
   337  		defer func() { require.NoError(b, bt.Close()) }()
   338  		for i := 0; i < N; i++ {
   339  			bt.Set(rand.Uint64(), uint64(i))
   340  		}
   341  		b.Logf("Populate took: %s. stats: %+v\n", time.Since(start), bt.Stats())
   342  
   343  		start = time.Now()
   344  		before := bt.Stats()
   345  		bt.DeleteBelow(uint64(N - 1<<20))
   346  		after := bt.Stats()
   347  		b.Logf("Purge took: %s. Before: %+v After: %+v\n", time.Since(start), before, after)
   348  	})
   349  }
   350  
   351  func BenchmarkWrite(b *testing.B) {
   352  	b.Run("map", func(b *testing.B) {
   353  		mp := make(map[uint64]uint64)
   354  		for n := 0; n < b.N; n++ {
   355  			k := rand.Uint64()
   356  			mp[k] = k
   357  		}
   358  	})
   359  	b.Run("btree", func(b *testing.B) {
   360  		bt := NewTree("BenchmarkWrite")
   361  		defer func() { require.NoError(b, bt.Close()) }()
   362  		b.ResetTimer()
   363  		for n := 0; n < b.N; n++ {
   364  			k := rand.Uint64()
   365  			bt.Set(k, k)
   366  		}
   367  	})
   368  }
   369  
   370  // goos: linux
   371  // goarch: amd64
   372  // pkg: github.com/outcaste-io/ristretto/z
   373  // BenchmarkRead/map-4         	10845322	       109 ns/op
   374  // BenchmarkRead/btree-4       	 2744283	       430 ns/op
   375  // Cumulative for 10 runs.
   376  // name          time/op
   377  // Read/map-4    105ns ± 1%
   378  // Read/btree-4  422ns ± 1%
   379  func BenchmarkRead(b *testing.B) {
   380  	N := 10 << 20
   381  	mp := make(map[uint64]uint64)
   382  	for i := 0; i < N; i++ {
   383  		k := uint64(rand.Intn(2*N)) + 1
   384  		mp[k] = k
   385  	}
   386  	b.Run("map", func(b *testing.B) {
   387  		for i := 0; i < b.N; i++ {
   388  			k := uint64(rand.Intn(2 * N))
   389  			v, ok := mp[k]
   390  			_, _ = v, ok
   391  		}
   392  	})
   393  
   394  	bt := NewTree("BenchmarkRead")
   395  	defer func() { require.NoError(b, bt.Close()) }()
   396  	for i := 0; i < N; i++ {
   397  		k := uint64(rand.Intn(2*N)) + 1
   398  		bt.Set(k, k)
   399  	}
   400  	stats := bt.Stats()
   401  	fmt.Printf("Num pages: %d Size: %s\n", stats.NumPages,
   402  		humanize.IBytes(uint64(stats.Bytes)))
   403  	fmt.Println("Writes done.")
   404  
   405  	b.Run("btree", func(b *testing.B) {
   406  		for i := 0; i < b.N; i++ {
   407  			k := uint64(rand.Intn(2*N)) + 1
   408  			v := bt.Get(k)
   409  			_ = v
   410  		}
   411  	})
   412  }
   413  
   414  func BenchmarkSearch(b *testing.B) {
   415  	linear := func(n node, k uint64, N int) int {
   416  		for i := 0; i < N; i++ {
   417  			if ki := n.key(i); ki >= k {
   418  				return i
   419  			}
   420  		}
   421  		return N
   422  	}
   423  	binary := func(n node, k uint64, N int) int {
   424  		return sort.Search(N, func(i int) bool {
   425  			return n.key(i) >= k
   426  		})
   427  	}
   428  	unroll4 := func(n node, k uint64, N int) int {
   429  		if len(n[:2*N]) < 8 {
   430  			for i := 0; i < N; i++ {
   431  				if ki := n.key(i); ki >= k {
   432  					return i
   433  				}
   434  			}
   435  			return N
   436  		}
   437  		return int(simd.Search(n[:2*N], k))
   438  	}
   439  
   440  	jumpBy := []int{8, 16, 32, 64, 128, 196, 255}
   441  	for _, sz := range jumpBy {
   442  		f, err := ioutil.TempFile(".", "tree")
   443  		require.NoError(b, err)
   444  
   445  		mf, err := OpenMmapFileUsing(f, pageSize, true)
   446  		if err != NewFile {
   447  			require.NoError(b, err)
   448  		}
   449  
   450  		n := getNode(mf.Data)
   451  		for i := 1; i <= sz; i++ {
   452  			n.set(uint64(i), uint64(i))
   453  		}
   454  
   455  		b.Run(fmt.Sprintf("linear-%d", sz), func(b *testing.B) {
   456  			for i := 0; i < b.N; i++ {
   457  				tmp = linear(n, math.MaxUint64, sz)
   458  			}
   459  		})
   460  		b.Run(fmt.Sprintf("binary-%d", sz), func(b *testing.B) {
   461  			for i := 0; i < b.N; i++ {
   462  				tmp = binary(n, uint64(sz), sz)
   463  			}
   464  		})
   465  		b.Run(fmt.Sprintf("unrolled-asm-%d", sz), func(b *testing.B) {
   466  			for i := 0; i < b.N; i++ {
   467  				tmp = unroll4(n, math.MaxUint64, sz)
   468  			}
   469  		})
   470  		mf.Close(0)
   471  		os.Remove(f.Name())
   472  	}
   473  }
   474  
   475  // This benchmark when run on dgus-delta, performed marginally better with threshold=32.
   476  // CustomSearch/sz-64_th-1-4     49.9ns ± 1% (fully binary)
   477  // CustomSearch/sz-64_th-16-4    63.3ns ± 0%
   478  // CustomSearch/sz-64_th-32-4    58.7ns ± 7%
   479  // CustomSearch/sz-64_th-64-4    63.9ns ± 7% (fully linear)
   480  
   481  // CustomSearch/sz-128_th-32-4   70.2ns ± 1%
   482  
   483  // CustomSearch/sz-255_th-1-4    77.3ns ± 0% (fully binary)
   484  // CustomSearch/sz-255_th-16-4   68.2ns ± 1%
   485  // CustomSearch/sz-255_th-32-4   67.0ns ± 7%
   486  // CustomSearch/sz-255_th-64-4   85.5ns ±19%
   487  // CustomSearch/sz-255_th-256-4   129ns ± 6% (fully linear)
   488  
   489  func BenchmarkCustomSearch(b *testing.B) {
   490  	mixed := func(n node, k uint64, N int, threshold int) int {
   491  		lo, hi := 0, N
   492  		// Reduce the search space using binary seach and then do linear search.
   493  		for hi-lo > threshold {
   494  			mid := (hi + lo) / 2
   495  			km := n.key(mid)
   496  			if k == km {
   497  				return mid
   498  			}
   499  			if k > km {
   500  				// key is greater than the key at mid, so move right.
   501  				lo = mid + 1
   502  			} else {
   503  				// else move left.
   504  				hi = mid
   505  			}
   506  		}
   507  		for i := lo; i <= hi; i++ {
   508  			if ki := n.key(i); ki >= k {
   509  				return i
   510  			}
   511  		}
   512  		return N
   513  	}
   514  
   515  	for _, sz := range []int{64, 128, 255} {
   516  		n := getNode(make([]byte, pageSize))
   517  		for i := 1; i <= sz; i++ {
   518  			n.set(uint64(i), uint64(i))
   519  		}
   520  
   521  		mk := sz + 1
   522  		for th := 1; th <= sz+1; th *= 2 {
   523  			b.Run(fmt.Sprintf("sz-%d th-%d", sz, th), func(b *testing.B) {
   524  				for i := 0; i < b.N; i++ {
   525  					k := uint64(rand.Intn(mk))
   526  					tmp = mixed(n, k, sz, th)
   527  				}
   528  			})
   529  		}
   530  	}
   531  }