github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/tree/node_test.go (about)

     1  // Copyright 2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tree
    16  
    17  import (
    18  	"context"
    19  	"math"
    20  	"math/rand"
    21  	"testing"
    22  	"unsafe"
    23  
    24  	"github.com/stretchr/testify/assert"
    25  	"github.com/stretchr/testify/require"
    26  
    27  	"github.com/dolthub/dolt/go/gen/fb/serial"
    28  	"github.com/dolthub/dolt/go/store/chunks"
    29  	"github.com/dolthub/dolt/go/store/hash"
    30  	"github.com/dolthub/dolt/go/store/prolly/message"
    31  	"github.com/dolthub/dolt/go/store/types"
    32  	"github.com/dolthub/dolt/go/store/val"
    33  )
    34  
    35  func TestRoundTripInts(t *testing.T) {
    36  	tups, _ := AscendingUintTuples(10)
    37  	keys := make([]val.Tuple, len(tups))
    38  	values := make([]val.Tuple, len(tups))
    39  	for i := range tups {
    40  		keys[i] = tups[i][0]
    41  		values[i] = tups[i][1]
    42  	}
    43  	require.True(t, sumTupleSize(keys)+sumTupleSize(values) < message.MaxVectorOffset)
    44  
    45  	nd := NewTupleLeafNode(keys, values)
    46  	assert.True(t, nd.IsLeaf())
    47  	assert.Equal(t, len(keys), int(nd.count))
    48  	for i := range keys {
    49  		assert.Equal(t, keys[i], val.Tuple(nd.GetKey(i)))
    50  		assert.Equal(t, values[i], val.Tuple(nd.GetValue(i)))
    51  	}
    52  }
    53  
    54  func TestRoundTripNodeItems(t *testing.T) {
    55  	for trial := 0; trial < 100; trial++ {
    56  		keys, values := randomNodeItemPairs(t, (rand.Int()%101)+50)
    57  		require.True(t, sumSize(keys)+sumSize(values) < message.MaxVectorOffset)
    58  
    59  		nd := newLeafNode(keys, values)
    60  		assert.True(t, nd.IsLeaf())
    61  		assert.Equal(t, len(keys), int(nd.count))
    62  		for i := range keys {
    63  			assert.Equal(t, keys[i], nd.GetKey(i))
    64  			assert.Equal(t, values[i], nd.GetValue(i))
    65  		}
    66  	}
    67  }
    68  
    69  func TestNodeSize(t *testing.T) {
    70  	sz := unsafe.Sizeof(Node{})
    71  	assert.Equal(t, 56, int(sz))
    72  }
    73  
    74  func BenchmarkNodeGet(b *testing.B) {
    75  	const (
    76  		count int = 128
    77  		mask  int = 0x7f
    78  	)
    79  	tuples, _ := AscendingUintTuples(count)
    80  	assert.Len(b, tuples, count)
    81  	keys := make([]Item, count)
    82  	vals := make([]Item, count)
    83  	for i := range tuples {
    84  		keys[i] = Item(tuples[i][0])
    85  		vals[i] = Item(tuples[i][1])
    86  	}
    87  	nd := newLeafNode(keys, vals)
    88  
    89  	var pm serial.ProllyTreeNode
    90  	err := serial.InitProllyTreeNodeRoot(&pm, nd.msg, serial.MessagePrefixSz)
    91  	require.NoError(b, err)
    92  	b.ResetTimer()
    93  
    94  	b.Run("ItemAccess Get", func(b *testing.B) {
    95  		var k Item
    96  		for i := 0; i < b.N; i++ {
    97  			k = nd.GetKey(i & mask)
    98  		}
    99  		assert.NotNil(b, k)
   100  	})
   101  	b.Run("Flatbuffers Get", func(b *testing.B) {
   102  		var k Item
   103  		for i := 0; i < b.N; i++ {
   104  			k = flatbuffersGetKey(i&mask, pm)
   105  		}
   106  		assert.NotNil(b, k)
   107  	})
   108  }
   109  
   110  // Node.Get() without cached offset metadata
   111  func flatbuffersGetKey(i int, pm serial.ProllyTreeNode) (key []byte) {
   112  	buf := pm.KeyItemsBytes()
   113  	start := pm.KeyOffsets(i)
   114  	stop := pm.KeyOffsets(i + 1)
   115  	key = buf[start:stop]
   116  	return
   117  }
   118  
   119  func TestNodeHashValueCompatibility(t *testing.T) {
   120  	keys, values := randomNodeItemPairs(t, (rand.Int()%101)+50)
   121  	nd := newLeafNode(keys, values)
   122  	nbf := types.Format_DOLT
   123  	th, err := ValueFromNode(nd).Hash(nbf)
   124  	require.NoError(t, err)
   125  	assert.Equal(t, nd.HashOf(), th)
   126  
   127  	h1 := hash.Parse("kvup5vdur99ush7c18g0kjc6rhdkfdgo")
   128  	h2 := hash.Parse("7e54ill10nji9oao1ja88buh9itaj7k9")
   129  	msg := message.NewAddressMapSerializer(sharedPool).Serialize(
   130  		[][]byte{[]byte("chopin"), []byte("listz")},
   131  		[][]byte{h1[:], h2[:]},
   132  		[]uint64{},
   133  		0)
   134  	nd, err = NodeFromBytes(msg)
   135  	require.NoError(t, err)
   136  	th, err = ValueFromNode(nd).Hash(nbf)
   137  	require.NoError(t, err)
   138  	assert.Equal(t, nd.HashOf(), th)
   139  }
   140  
   141  func TestNodeDecodeValueCompatibility(t *testing.T) {
   142  	keys, values := randomNodeItemPairs(t, (rand.Int()%101)+50)
   143  	nd := newLeafNode(keys, values)
   144  
   145  	ts := &chunks.TestStorage{}
   146  	cs := ts.NewView()
   147  	ns := NewNodeStore(cs)
   148  	vs := types.NewValueStore(cs)
   149  	h, err := ns.Write(context.Background(), nd)
   150  	require.NoError(t, err)
   151  
   152  	v, err := vs.ReadValue(context.Background(), h)
   153  	require.NoError(t, err)
   154  	assert.Equal(t, nd.bytes(), []byte(v.(types.SerialMessage)))
   155  }
   156  
   157  func randomNodeItemPairs(t *testing.T, count int) (keys, values []Item) {
   158  	keys = make([]Item, count)
   159  	for i := range keys {
   160  		sz := (rand.Int() % 41) + 10
   161  		keys[i] = make(Item, sz)
   162  		_, err := rand.Read(keys[i])
   163  		assert.NoError(t, err)
   164  	}
   165  
   166  	values = make([]Item, count)
   167  	copy(values, keys)
   168  	rand.Shuffle(len(values), func(i, j int) {
   169  		values[i], values[j] = values[j], values[i]
   170  	})
   171  
   172  	return
   173  }
   174  
   175  func sumSize(items []Item) (sz uint64) {
   176  	for _, item := range items {
   177  		sz += uint64(len(item))
   178  	}
   179  	return
   180  }
   181  
   182  func sumTupleSize(items []val.Tuple) (sz uint64) {
   183  	for _, item := range items {
   184  		sz += uint64(len(item))
   185  	}
   186  	return
   187  }
   188  
   189  func TestSamples(t *testing.T) {
   190  	tests := []struct {
   191  		data Samples
   192  		sum  float64
   193  		mean float64
   194  		std  float64
   195  	}{
   196  		{
   197  			data: Samples{1},
   198  			sum:  1.0,
   199  			mean: 1.0,
   200  			std:  0.0,
   201  		},
   202  		{
   203  			data: Samples{1, 2, 3, 4, 5},
   204  			sum:  15.0,
   205  			mean: 3.0,
   206  			std:  math.Sqrt(2),
   207  		},
   208  	}
   209  
   210  	for _, test := range tests {
   211  		assert.Equal(t, test.sum, test.data.sum())
   212  		assert.Equal(t, test.mean, test.data.mean())
   213  		assert.Equal(t, test.std, test.data.stdDev())
   214  	}
   215  }