github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/tree/blob_builder_test.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tree
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"errors"
    21  	"fmt"
    22  	"math"
    23  	"testing"
    24  
    25  	"github.com/stretchr/testify/assert"
    26  	"github.com/stretchr/testify/require"
    27  
    28  	"github.com/dolthub/dolt/go/gen/fb/serial"
    29  	"github.com/dolthub/dolt/go/store/hash"
    30  	"github.com/dolthub/dolt/go/store/prolly/message"
    31  	"github.com/dolthub/dolt/go/store/val"
    32  )
    33  
    34  func TestWriteImmutableTree(t *testing.T) {
    35  	tests := []struct {
    36  		inputSize int
    37  		chunkSize int
    38  		execErr   error
    39  		initErr   error
    40  		checkSum  bool
    41  	}{
    42  		{
    43  			inputSize: 100,
    44  			chunkSize: 40,
    45  		},
    46  		{
    47  			inputSize: 100,
    48  			chunkSize: 100,
    49  		},
    50  		{
    51  			inputSize: 100,
    52  			chunkSize: 100,
    53  		},
    54  		{
    55  			inputSize: 255,
    56  			chunkSize: 40,
    57  		},
    58  		{
    59  			inputSize: 243,
    60  			chunkSize: 40,
    61  		},
    62  		{
    63  			inputSize: 47,
    64  			chunkSize: 40,
    65  		},
    66  		{
    67  			inputSize: 200,
    68  			chunkSize: 40,
    69  		},
    70  		{
    71  			inputSize: 200,
    72  			chunkSize: 40,
    73  		},
    74  		{
    75  			inputSize: 1,
    76  			chunkSize: 40,
    77  		},
    78  		{
    79  			inputSize: 20,
    80  			chunkSize: 500,
    81  		},
    82  		{
    83  			inputSize: 1_000,
    84  			chunkSize: 40,
    85  			checkSum:  false,
    86  		},
    87  		{
    88  			inputSize: 1_000,
    89  			chunkSize: 60,
    90  			checkSum:  false,
    91  		},
    92  		{
    93  			inputSize: 1_000,
    94  			chunkSize: 80,
    95  			checkSum:  false,
    96  		},
    97  		{
    98  			inputSize: 10_000,
    99  			chunkSize: 100,
   100  			checkSum:  false,
   101  		},
   102  		{
   103  			inputSize: 50_000_000,
   104  			chunkSize: 4000,
   105  			checkSum:  false,
   106  		},
   107  		{
   108  			inputSize: 50_000_000,
   109  			chunkSize: 32_000,
   110  			checkSum:  false,
   111  		},
   112  		{
   113  			inputSize: 0,
   114  			chunkSize: 40,
   115  		},
   116  		{
   117  			inputSize: 100,
   118  			chunkSize: 41,
   119  			initErr:   ErrInvalidChunkSize,
   120  		},
   121  	}
   122  
   123  	for _, tt := range tests {
   124  		t.Run(fmt.Sprintf("inputSize=%d; chunkSize=%d", tt.inputSize, tt.chunkSize), func(t *testing.T) {
   125  			buf := make([]byte, tt.inputSize)
   126  			for i := range buf {
   127  				buf[i] = byte(i)
   128  			}
   129  			ctx := context.Background()
   130  			r := bytes.NewReader(buf)
   131  			ns := NewTestNodeStore()
   132  			//serializer := message.NewBlobSerializer(ns.Pool())
   133  
   134  			b, err := NewBlobBuilder(tt.chunkSize)
   135  			if tt.initErr != nil {
   136  				require.True(t, errors.Is(err, tt.initErr))
   137  				return
   138  			}
   139  			b.SetNodeStore(ns)
   140  			b.Init(tt.inputSize)
   141  			root, _, err := b.Chunk(ctx, r)
   142  
   143  			if tt.execErr != nil {
   144  				require.True(t, errors.Is(err, tt.execErr))
   145  				return
   146  			}
   147  			require.NoError(t, err)
   148  
   149  			expSubtrees := expectedSubtrees(tt.inputSize, tt.chunkSize)
   150  			expLevel := expectedLevel(tt.inputSize, tt.chunkSize)
   151  			expSum := expectedSum(tt.inputSize)
   152  			expUnfilled := expectedUnfilled(tt.inputSize, tt.chunkSize)
   153  
   154  			intChunkSize := int(math.Ceil(float64(tt.chunkSize) / float64(hash.ByteLen)))
   155  
   156  			unfilledCnt := 0
   157  			sum := 0
   158  			byteCnt := 0
   159  			WalkNodes(ctx, root, ns, func(ctx context.Context, n Node) error {
   160  				if n.empty() {
   161  					return nil
   162  				}
   163  				var keyCnt int
   164  				leaf := n.IsLeaf()
   165  				if leaf {
   166  					byteCnt += len(getBlobValues(n.msg))
   167  					for _, i := range n.GetValue(0) {
   168  						sum += int(i)
   169  					}
   170  					keyCnt = len(getBlobValues(n.msg))
   171  					if keyCnt != tt.chunkSize {
   172  						unfilledCnt += 1
   173  					}
   174  				} else {
   175  					keyCnt = n.Count()
   176  					if keyCnt < intChunkSize {
   177  						unfilledCnt += 1
   178  					}
   179  				}
   180  				return nil
   181  			})
   182  
   183  			level := root.Level()
   184  			assert.Equal(t, expLevel, level)
   185  			if tt.checkSum {
   186  				assert.Equal(t, expSum, sum)
   187  			}
   188  			assert.Equal(t, tt.inputSize, byteCnt)
   189  			assert.Equal(t, expUnfilled, unfilledCnt)
   190  			if expLevel > 0 {
   191  				root, err = root.loadSubtrees()
   192  				require.NoError(t, err)
   193  				for i := range expSubtrees {
   194  					sc, err := root.getSubtreeCount(i)
   195  					require.NoError(t, err)
   196  					assert.Equal(t, expSubtrees[i], sc)
   197  				}
   198  			}
   199  		})
   200  	}
   201  }
   202  
   203  func expectedLevel(size, chunk int) int {
   204  	if size <= chunk {
   205  		return 0
   206  	}
   207  	size = int(math.Ceil(float64(size) / float64(chunk)))
   208  	l := 1
   209  	intChunk := int(math.Ceil(float64(chunk) / float64(hash.ByteLen)))
   210  	for size > intChunk {
   211  		size = int(math.Ceil(float64(size) / float64(intChunk)))
   212  		l += 1
   213  	}
   214  	return l
   215  }
   216  
   217  func expectedSubtrees(size, chunk int) subtreeCounts {
   218  	if size <= chunk {
   219  		return subtreeCounts{0}
   220  	}
   221  	l := expectedLevel(size, chunk)
   222  
   223  	size = int(math.Ceil(float64(size) / float64(chunk)))
   224  	intChunk := int(math.Ceil(float64(chunk) / float64(hash.ByteLen)))
   225  
   226  	filledSubtree := int(math.Pow(float64(intChunk), float64(l-1)))
   227  
   228  	subtrees := make(subtreeCounts, 0)
   229  	for size > filledSubtree {
   230  		subtrees = append(subtrees, uint64(filledSubtree))
   231  		size -= filledSubtree
   232  	}
   233  	if size > 0 {
   234  		subtrees = append(subtrees, uint64(size))
   235  	}
   236  	if len(subtrees) > intChunk {
   237  		panic("unreachable")
   238  	}
   239  	return subtrees
   240  }
   241  
   242  func expectedSum(size int) int {
   243  	return (size * (size + 1) / 2) - size
   244  }
   245  
   246  func expectedUnfilled(size, chunk int) int {
   247  	if size == chunk || size == 0 {
   248  		return 0
   249  	} else if size < chunk {
   250  		return 1
   251  	}
   252  
   253  	var unfilled int
   254  	// level 0 is special case
   255  	if size%chunk != 0 {
   256  		unfilled += 1
   257  	}
   258  	size = int(math.Ceil(float64(size) / float64(chunk)))
   259  
   260  	intChunk := int(math.Ceil(float64(chunk) / float64(hash.ByteLen)))
   261  	for size > intChunk {
   262  		if size%intChunk != 0 {
   263  			unfilled += 1
   264  		}
   265  		size = int(math.Ceil(float64(size) / float64(intChunk)))
   266  	}
   267  	if size < intChunk {
   268  		unfilled += 1
   269  	}
   270  	return unfilled
   271  }
   272  
   273  func TestImmutableTreeWalk(t *testing.T) {
   274  	tests := []struct {
   275  		blobLen   int
   276  		chunkSize int
   277  		keyCnt    int
   278  	}{
   279  		{
   280  			blobLen:   250,
   281  			chunkSize: 60,
   282  			keyCnt:    4,
   283  		},
   284  		{
   285  			blobLen:   250,
   286  			chunkSize: 40,
   287  			keyCnt:    4,
   288  		},
   289  		{
   290  			blobLen:   378,
   291  			chunkSize: 60,
   292  			keyCnt:    12,
   293  		},
   294  		{
   295  			blobLen:   5000,
   296  			chunkSize: 40,
   297  			keyCnt:    6,
   298  		},
   299  		{
   300  			blobLen:   1,
   301  			chunkSize: 40,
   302  			keyCnt:    6,
   303  		},
   304  		{
   305  			blobLen:   50_000_000,
   306  			chunkSize: 4000,
   307  			keyCnt:    1,
   308  		},
   309  		{
   310  			blobLen:   10_000,
   311  			chunkSize: 80,
   312  			keyCnt:    6,
   313  		},
   314  	}
   315  
   316  	ns := NewTestNodeStore()
   317  	for _, tt := range tests {
   318  		t.Run(fmt.Sprintf("inputSize=%d; chunkSize=%d; keyCnt=%d", tt.blobLen, tt.chunkSize, tt.keyCnt), func(t *testing.T) {
   319  			r := newTree(t, ns, tt.keyCnt, tt.blobLen, tt.chunkSize)
   320  			var cnt int
   321  			walkOpaqueNodes(context.Background(), r, ns, func(ctx context.Context, n Node) error {
   322  				cnt++
   323  				return nil
   324  			})
   325  			require.Equal(t, blobAddrCnt(tt.blobLen, tt.chunkSize)*tt.keyCnt+1, cnt)
   326  		})
   327  	}
   328  }
   329  
   330  func blobAddrCnt(size, chunk int) int {
   331  	if size == 0 {
   332  		return 0
   333  	}
   334  	if size <= chunk {
   335  		return 1
   336  	}
   337  	size = int(math.Ceil(float64(size) / float64(chunk)))
   338  	l := 1
   339  	sum := size
   340  	intChunk := int(math.Ceil(float64(chunk) / float64(hash.ByteLen)))
   341  	for size > intChunk {
   342  		size = int(math.Ceil(float64(size) / float64(intChunk)))
   343  		sum += size
   344  		l += 1
   345  	}
   346  	return sum + 1
   347  }
   348  
   349  func newTree(t *testing.T, ns NodeStore, keyCnt, blobLen, chunkSize int) Node {
   350  	ctx := context.Background()
   351  
   352  	keyDesc := val.NewTupleDescriptor(val.Type{Enc: val.Uint32Enc})
   353  	valDesc := val.NewTupleDescriptor(val.Type{Enc: val.BytesAddrEnc})
   354  
   355  	tuples := make([][2]val.Tuple, keyCnt)
   356  	keyBld := val.NewTupleBuilder(keyDesc)
   357  	valBld := val.NewTupleBuilder(valDesc)
   358  	for i := range tuples {
   359  		keyBld.PutUint32(0, uint32(i))
   360  		tuples[i][0] = keyBld.Build(sharedPool)
   361  
   362  		addr := mustNewBlob(ctx, ns, blobLen, chunkSize)
   363  		valBld.PutBytesAddr(0, addr)
   364  		tuples[i][1] = valBld.Build(sharedPool)
   365  	}
   366  
   367  	s := message.NewProllyMapSerializer(valDesc, ns.Pool())
   368  	chunker, err := newEmptyChunker(ctx, ns, s)
   369  	require.NoError(t, err)
   370  	for _, pair := range tuples {
   371  		err := chunker.AddPair(ctx, Item(pair[0]), Item(pair[1]))
   372  		require.NoError(t, err)
   373  	}
   374  	root, err := chunker.Done(ctx)
   375  	require.NoError(t, err)
   376  	return root
   377  }
   378  
   379  func mustNewBlob(ctx context.Context, ns NodeStore, len, chunkSize int) hash.Hash {
   380  	buf := make([]byte, len)
   381  	for i := range buf {
   382  		buf[i] = byte(i)
   383  	}
   384  	r := bytes.NewReader(buf)
   385  	b, err := NewBlobBuilder(chunkSize)
   386  	if err != nil {
   387  		panic(err)
   388  	}
   389  	b.SetNodeStore(ns)
   390  	b.Init(len)
   391  	_, addr, err := b.Chunk(ctx, r)
   392  	if err != nil {
   393  		panic(err)
   394  	}
   395  	return addr
   396  }
   397  
   398  func getBlobValues(msg serial.Message) []byte {
   399  	var b serial.Blob
   400  	err := serial.InitBlobRoot(&b, msg, serial.MessagePrefixSz)
   401  	if err != nil {
   402  		panic(err)
   403  	}
   404  	return b.PayloadBytes()
   405  }