github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/concurrent_writing_integration_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  //go:build integrationTest
    13  // +build integrationTest
    14  
    15  package lsmkv
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"crypto/rand"
    21  	"fmt"
    22  	"reflect"
    23  	"sync"
    24  	"testing"
    25  	"time"
    26  
    27  	"github.com/google/uuid"
    28  	"github.com/stretchr/testify/assert"
    29  	"github.com/stretchr/testify/require"
    30  	"github.com/weaviate/weaviate/entities/cyclemanager"
    31  )
    32  
    33  // This test continuously writes into a bucket with a small memtable threshold,
    34  // so that a lot of flushing is happening while writing. This is to ensure that
    35  // there will be no lost writes or other inconsistencies under load
    36  func TestConcurrentWriting_Replace(t *testing.T) {
    37  	dirName := t.TempDir()
    38  
    39  	amount := 2000
    40  	sizePerValue := 128
    41  
    42  	keys := make([][]byte, amount)
    43  	values := make([][]byte, amount)
    44  
    45  	bucket, err := NewBucket(testCtx(), dirName, "", nullLogger(), nil,
    46  		cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
    47  		WithStrategy(StrategyReplace),
    48  		WithMemtableThreshold(10000))
    49  	require.Nil(t, err)
    50  
    51  	t.Run("generate random data", func(t *testing.T) {
    52  		for i := range keys {
    53  			uuid, err := uuid.New().MarshalBinary()
    54  			require.Nil(t, err)
    55  			keys[i] = uuid
    56  
    57  			values[i] = make([]byte, sizePerValue)
    58  			rand.Read(values[i])
    59  		}
    60  	})
    61  
    62  	t.Run("import", func(t *testing.T) {
    63  		wg := sync.WaitGroup{}
    64  
    65  		for i := range keys {
    66  			time.Sleep(50 * time.Microsecond)
    67  			wg.Add(1)
    68  			go func(index int) {
    69  				defer wg.Done()
    70  				err := bucket.Put(keys[index], values[index])
    71  				assert.Nil(t, err)
    72  			}(i)
    73  		}
    74  		wg.Wait()
    75  	})
    76  
    77  	t.Run("verify get", func(t *testing.T) {
    78  		correct := 0
    79  		var missingKeys []int
    80  
    81  		for i := range keys {
    82  			value, err := bucket.Get(keys[i])
    83  			assert.Nil(t, err)
    84  			if bytes.Equal(values[i], value) {
    85  				correct++
    86  			} else {
    87  				missingKeys = append(missingKeys, i)
    88  			}
    89  		}
    90  
    91  		if len(missingKeys) > 0 {
    92  			fmt.Printf("missing keys: %v\n", missingKeys)
    93  		}
    94  		assert.Equal(t, amount, correct)
    95  	})
    96  
    97  	t.Run("verify cursor", func(t *testing.T) {
    98  		correct := 0
    99  		// put all key value/pairs in a map so we can access them by key
   100  		targets := map[string][]byte{}
   101  
   102  		for i := range keys {
   103  			targets[string(keys[i])] = values[i]
   104  		}
   105  
   106  		c := bucket.Cursor()
   107  		defer c.Close()
   108  		for k, v := c.First(); k != nil; k, v = c.Next() {
   109  			control := targets[string(k)]
   110  			if bytes.Equal(control, v) {
   111  				correct++
   112  			}
   113  		}
   114  
   115  		assert.Equal(t, amount, correct)
   116  	})
   117  
   118  	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
   119  	defer cancel()
   120  
   121  	require.Nil(t, bucket.Shutdown(ctx))
   122  }
   123  
   124  // This test continuously writes into a bucket with a small memtable threshold,
   125  // so that a lot of flushing is happening while writing. This is to ensure that
   126  // there will be no lost writes or other inconsistencies under load
   127  func TestConcurrentWriting_Set(t *testing.T) {
   128  	dirName := t.TempDir()
   129  
   130  	amount := 2000
   131  	valuesPerKey := 4
   132  	sizePerValue := 32
   133  
   134  	keys := make([][]byte, amount)
   135  	values := make([][][]byte, amount)
   136  
   137  	bucket, err := NewBucket(testCtx(), dirName, "", nullLogger(), nil,
   138  		cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
   139  		WithStrategy(StrategySetCollection),
   140  		WithMemtableThreshold(10000))
   141  	require.Nil(t, err)
   142  
   143  	t.Run("generate random data", func(t *testing.T) {
   144  		for i := range keys {
   145  			uuid, err := uuid.New().MarshalBinary()
   146  			require.Nil(t, err)
   147  			keys[i] = uuid
   148  
   149  			values[i] = make([][]byte, valuesPerKey)
   150  			for j := range values[i] {
   151  				values[i][j] = make([]byte, sizePerValue)
   152  				rand.Read(values[i][j])
   153  			}
   154  		}
   155  	})
   156  
   157  	t.Run("import", func(t *testing.T) {
   158  		wg := sync.WaitGroup{}
   159  
   160  		for i := range keys {
   161  			time.Sleep(50 * time.Microsecond)
   162  			wg.Add(1)
   163  			go func(index int) {
   164  				defer wg.Done()
   165  				err := bucket.SetAdd(keys[index], values[index])
   166  				assert.Nil(t, err)
   167  			}(i)
   168  		}
   169  		wg.Wait()
   170  	})
   171  
   172  	t.Run("verify get", func(t *testing.T) {
   173  		correct := 0
   174  
   175  		for i := range keys {
   176  			value, err := bucket.SetList(keys[i])
   177  			assert.Nil(t, err)
   178  			if reflect.DeepEqual(values[i], value) {
   179  				correct++
   180  			}
   181  		}
   182  
   183  		assert.Equal(t, amount, correct)
   184  	})
   185  
   186  	t.Run("verify cursor", func(t *testing.T) {
   187  		correct := 0
   188  		// put all key value/pairs in a map so we can access them by key
   189  		targets := map[string][][]byte{}
   190  
   191  		for i := range keys {
   192  			targets[string(keys[i])] = values[i]
   193  		}
   194  
   195  		c := bucket.SetCursor()
   196  		defer c.Close()
   197  		for k, v := c.First(); k != nil; k, v = c.Next() {
   198  			control := targets[string(k)]
   199  			if reflect.DeepEqual(control, v) {
   200  				correct++
   201  			}
   202  		}
   203  
   204  		assert.Equal(t, amount, correct)
   205  	})
   206  
   207  	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
   208  	defer cancel()
   209  
   210  	require.Nil(t, bucket.Shutdown(ctx))
   211  }
   212  
   213  // This test continuously writes into a bucket with a small memtable threshold,
   214  // so that a lot of flushing is happening while writing. This is to ensure that
   215  // there will be no lost writes or other inconsistencies under load
   216  func TestConcurrentWriting_Map(t *testing.T) {
   217  	dirName := t.TempDir()
   218  
   219  	amount := 2000
   220  	valuesPerKey := 4
   221  	sizePerKey := 8
   222  	sizePerValue := 32
   223  
   224  	keys := make([][]byte, amount)
   225  	values := make([][]MapPair, amount)
   226  
   227  	bucket, err := NewBucket(testCtx(), dirName, "", nullLogger(), nil,
   228  		cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
   229  		WithStrategy(StrategyMapCollection),
   230  		WithMemtableThreshold(5000))
   231  	require.Nil(t, err)
   232  
   233  	t.Run("generate random data", func(t *testing.T) {
   234  		for i := range keys {
   235  			uuid, err := uuid.New().MarshalBinary()
   236  			require.Nil(t, err)
   237  			keys[i] = uuid
   238  
   239  			values[i] = make([]MapPair, valuesPerKey)
   240  			for j := range values[i] {
   241  				values[i][j] = MapPair{
   242  					Key:   make([]byte, sizePerKey),
   243  					Value: make([]byte, sizePerValue),
   244  				}
   245  				rand.Read(values[i][j].Key)
   246  				rand.Read(values[i][j].Value)
   247  			}
   248  		}
   249  	})
   250  
   251  	t.Run("import", func(t *testing.T) {
   252  		wg := sync.WaitGroup{}
   253  
   254  		for i := range keys {
   255  			for j := 0; j < valuesPerKey; j++ {
   256  				time.Sleep(50 * time.Microsecond)
   257  				wg.Add(1)
   258  				go func(rowIndex, valueIndex int) {
   259  					defer wg.Done()
   260  					err := bucket.MapSet(keys[rowIndex], values[rowIndex][valueIndex])
   261  					assert.Nil(t, err)
   262  				}(i, j)
   263  			}
   264  		}
   265  		wg.Wait()
   266  	})
   267  
   268  	t.Run("verify cursor", func(t *testing.T) {
   269  		correct := 0
   270  		// put all key value/pairs in a map so we can access them by key
   271  		targets := map[string][]MapPair{}
   272  
   273  		for i := range keys {
   274  			targets[string(keys[i])] = values[i]
   275  		}
   276  
   277  		c := bucket.MapCursor()
   278  		defer c.Close()
   279  		for k, v := c.First(); k != nil; k, v = c.Next() {
   280  			control := targets[string(k)]
   281  			if mapElementsMatch(control, v) {
   282  				correct++
   283  			}
   284  		}
   285  
   286  		assert.Equal(t, amount, correct)
   287  	})
   288  
   289  	t.Run("verify get", func(t *testing.T) {
   290  		correct := 0
   291  
   292  		for i := range keys {
   293  			value, err := bucket.MapList(keys[i])
   294  			assert.Nil(t, err)
   295  			if mapElementsMatch(values[i], value) {
   296  				correct++
   297  			}
   298  		}
   299  
   300  		assert.Equal(t, amount, correct)
   301  	})
   302  
   303  	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
   304  	defer cancel()
   305  
   306  	require.Nil(t, bucket.Shutdown(ctx))
   307  }
   308  
   309  func mapElementsMatch(a, b []MapPair) bool {
   310  	if len(a) != len(b) {
   311  		return false
   312  	}
   313  
   314  	aMap := map[string][]byte{}
   315  
   316  	for _, kv := range a {
   317  		aMap[string(kv.Key)] = kv.Value
   318  	}
   319  
   320  	for _, kv := range b {
   321  		control := aMap[string(kv.Key)]
   322  		if !bytes.Equal(kv.Value, control) {
   323  			return false
   324  		}
   325  	}
   326  
   327  	return true
   328  }