github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/compaction_integration2_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package lsmkv
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"math/rand"
    18  	"testing"
    19  
    20  	"github.com/sirupsen/logrus"
    21  	"github.com/sirupsen/logrus/hooks/test"
    22  	"github.com/stretchr/testify/assert"
    23  	"github.com/stretchr/testify/require"
    24  	"github.com/weaviate/weaviate/entities/cyclemanager"
    25  )
    26  
    27  func TestCompactionReplaceStrategyStraggler(t *testing.T) {
    28  	opts := []BucketOption{WithStrategy(StrategyReplace)}
    29  	size := 200
    30  
    31  	type kv struct {
    32  		key    []byte
    33  		value  []byte
    34  		delete bool
    35  	}
    36  
    37  	var segment1 []kv
    38  	var segment2 []kv
    39  	var segment3 []kv
    40  	var expected []kv
    41  	var bucket *Bucket
    42  
    43  	dirName := t.TempDir()
    44  
    45  	t.Run("create test data", func(t *testing.T) {
    46  		// The test data is split into 4 scenarios evenly:
    47  		//
    48  		// 1.) created in the first segment, never touched again
    49  		// 2.) created in the first segment, updated in the second
    50  		// 3.) created in the first segment, deleted in the second
    51  		// 4.) not present in the first segment, created in the second
    52  		for i := 0; i < size; i++ {
    53  			key := []byte(fmt.Sprintf("key-%3d", i))
    54  			originalValue := []byte(fmt.Sprintf("value-%3d-original", i))
    55  
    56  			switch i % 4 {
    57  			case 0:
    58  				// add to segment 1
    59  				segment1 = append(segment1, kv{
    60  					key:   key,
    61  					value: originalValue,
    62  				})
    63  
    64  				// leave this element untouched in the second segment
    65  				expected = append(expected, kv{
    66  					key:   key,
    67  					value: originalValue,
    68  				})
    69  			case 1:
    70  				// add to segment 1
    71  				segment1 = append(segment1, kv{
    72  					key:   key,
    73  					value: originalValue,
    74  				})
    75  
    76  				// update in the second segment
    77  				updatedValue := []byte(fmt.Sprintf("value-%3d-updated", i))
    78  				segment2 = append(segment2, kv{
    79  					key:   key,
    80  					value: updatedValue,
    81  				})
    82  				// update in the third segment
    83  				updatedValue = []byte(fmt.Sprintf("value-%3d-updated-twice", i))
    84  				segment3 = append(segment3, kv{
    85  					key:   key,
    86  					value: updatedValue,
    87  				})
    88  
    89  				expected = append(expected, kv{
    90  					key:   key,
    91  					value: updatedValue,
    92  				})
    93  			case 2:
    94  				// add to segment 1
    95  				segment1 = append(segment1, kv{
    96  					key:   key,
    97  					value: originalValue,
    98  				})
    99  
   100  				// delete in the third segment
   101  				segment3 = append(segment3, kv{
   102  					key:    key,
   103  					delete: true,
   104  				})
   105  
   106  				// do not add to expected at all
   107  
   108  			case 3:
   109  				// do not add to segment 1
   110  
   111  				// only add to segment 3 (first entry)
   112  				segment3 = append(segment3, kv{
   113  					key:   key,
   114  					value: originalValue,
   115  				})
   116  
   117  				expected = append(expected, kv{
   118  					key:   key,
   119  					value: originalValue,
   120  				})
   121  			}
   122  		}
   123  	})
   124  
   125  	t.Run("shuffle the import order for each segment", func(t *testing.T) {
   126  		// this is to make sure we don't accidentally rely on the import order
   127  		rand.Shuffle(len(segment1), func(i, j int) {
   128  			segment1[i], segment1[j] = segment1[j], segment1[i]
   129  		})
   130  		rand.Shuffle(len(segment2), func(i, j int) {
   131  			segment2[i], segment2[j] = segment2[j], segment2[i]
   132  		})
   133  	})
   134  
   135  	t.Run("init bucket", func(t *testing.T) {
   136  		b, err := NewBucket(context.TODO(), dirName, "", nullLogger2(), nil,
   137  			cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), opts...)
   138  		require.Nil(t, err)
   139  
   140  		// so big it effectively never triggers as part of this test
   141  		b.SetMemtableThreshold(1e9)
   142  
   143  		bucket = b
   144  	})
   145  
   146  	t.Run("import segment 1", func(t *testing.T) {
   147  		for _, pair := range segment1 {
   148  			if !pair.delete {
   149  				err := bucket.Put(pair.key, pair.value)
   150  				require.Nil(t, err)
   151  			} else {
   152  				err := bucket.Delete(pair.key)
   153  				require.Nil(t, err)
   154  
   155  			}
   156  		}
   157  	})
   158  
   159  	t.Run("flush to disk", func(t *testing.T) {
   160  		require.Nil(t, bucket.FlushAndSwitch())
   161  	})
   162  
   163  	t.Run("import segment 2", func(t *testing.T) {
   164  		for _, pair := range segment2 {
   165  			if !pair.delete {
   166  				err := bucket.Put(pair.key, pair.value)
   167  				require.Nil(t, err)
   168  			} else {
   169  				err := bucket.Delete(pair.key)
   170  				require.Nil(t, err)
   171  
   172  			}
   173  		}
   174  	})
   175  
   176  	t.Run("flush to disk", func(t *testing.T) {
   177  		require.Nil(t, bucket.FlushAndSwitch())
   178  	})
   179  
   180  	t.Run("import segment 3", func(t *testing.T) {
   181  		for _, pair := range segment3 {
   182  			if !pair.delete {
   183  				err := bucket.Put(pair.key, pair.value)
   184  				require.Nil(t, err)
   185  			} else {
   186  				err := bucket.Delete(pair.key)
   187  				require.Nil(t, err)
   188  
   189  			}
   190  		}
   191  	})
   192  
   193  	t.Run("flush to disk", func(t *testing.T) {
   194  		require.Nil(t, bucket.FlushAndSwitch())
   195  	})
   196  
   197  	t.Run("verify control before compaction", func(t *testing.T) {
   198  		var retrieved []kv
   199  
   200  		c := bucket.Cursor()
   201  		defer c.Close()
   202  
   203  		for k, v := c.First(); k != nil; k, v = c.Next() {
   204  			keyCopy := copyByteSlice2(k)
   205  			valueCopy := copyByteSlice2(v)
   206  			retrieved = append(retrieved, kv{
   207  				key:   keyCopy,
   208  				value: valueCopy,
   209  			})
   210  		}
   211  
   212  		assert.Equal(t, expected, retrieved)
   213  	})
   214  
   215  	t.Run("verify count control before compaction", func(*testing.T) {
   216  		assert.Equal(t, len(expected), bucket.Count())
   217  	})
   218  
   219  	t.Run("compact until no longer eligible", func(t *testing.T) {
   220  		var compacted bool
   221  		var err error
   222  		for compacted, err = bucket.disk.compactOnce(); err == nil && compacted; compacted, err = bucket.disk.compactOnce() {
   223  		}
   224  		require.Nil(t, err)
   225  	})
   226  
   227  	t.Run("verify control after compaction", func(t *testing.T) {
   228  		var retrieved []kv
   229  
   230  		c := bucket.Cursor()
   231  		defer c.Close()
   232  
   233  		for k, v := c.First(); k != nil; k, v = c.Next() {
   234  			keyCopy := copyByteSlice2(k)
   235  			valueCopy := copyByteSlice2(v)
   236  			retrieved = append(retrieved, kv{
   237  				key:   keyCopy,
   238  				value: valueCopy,
   239  			})
   240  		}
   241  
   242  		assert.Equal(t, expected, retrieved)
   243  	})
   244  
   245  	t.Run("verify control using individual get operations",
   246  		func(t *testing.T) {
   247  			for _, pair := range expected {
   248  				retrieved, err := bucket.Get(pair.key)
   249  				require.NoError(t, err)
   250  
   251  				assert.Equal(t, pair.value, retrieved)
   252  			}
   253  		})
   254  
   255  	t.Run("verify count after compaction", func(*testing.T) {
   256  		assert.Equal(t, len(expected), bucket.Count())
   257  	})
   258  }
   259  
   260  func nullLogger2() logrus.FieldLogger {
   261  	log, _ := test.NewNullLogger()
   262  	return log
   263  }
   264  
   265  func copyByteSlice2(src []byte) []byte {
   266  	dst := make([]byte, len(src))
   267  	copy(dst, src)
   268  	return dst
   269  }