github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/segment_net_count_additions_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package lsmkv
    13  
    14  import (
    15  	"context"
    16  	"encoding/binary"
    17  	"io"
    18  	"os"
    19  	"path"
    20  	"strings"
    21  	"testing"
    22  
    23  	"github.com/sirupsen/logrus/hooks/test"
    24  	"github.com/stretchr/testify/assert"
    25  	"github.com/stretchr/testify/require"
    26  	"github.com/weaviate/weaviate/entities/cyclemanager"
    27  )
    28  
    29  func TestCNA(t *testing.T) {
    30  	ctx := context.Background()
    31  	tests := bucketTests{
    32  		{
    33  			name: "createCNAOnFlush",
    34  			f:    createCNAOnFlush,
    35  			opts: []BucketOption{
    36  				WithStrategy(StrategyReplace),
    37  			},
    38  		},
    39  		{
    40  			name: "createCNAInit",
    41  			f:    createCNAInit,
    42  			opts: []BucketOption{
    43  				WithStrategy(StrategyReplace),
    44  			},
    45  		},
    46  		{
    47  			name: "repairCorruptedCNAOnInit",
    48  			f:    repairCorruptedCNAOnInit,
    49  			opts: []BucketOption{
    50  				WithStrategy(StrategyReplace),
    51  			},
    52  		},
    53  	}
    54  	tests.run(ctx, t)
    55  }
    56  
    57  func createCNAOnFlush(ctx context.Context, t *testing.T, opts []BucketOption) {
    58  	dirName := t.TempDir()
    59  
    60  	logger, _ := test.NewNullLogger()
    61  
    62  	b, err := NewBucket(ctx, dirName, "", logger, nil,
    63  		cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), opts...)
    64  	require.Nil(t, err)
    65  	defer b.Shutdown(ctx)
    66  
    67  	require.Nil(t, b.Put([]byte("hello"), []byte("world")))
    68  	require.Nil(t, b.FlushMemtable())
    69  
    70  	files, err := os.ReadDir(dirName)
    71  	require.Nil(t, err)
    72  
    73  	_, ok := findFileWithExt(files, ".cna")
    74  	assert.True(t, ok)
    75  }
    76  
    77  func createCNAInit(ctx context.Context, t *testing.T, opts []BucketOption) {
    78  	// this test deletes the initial cna and makes sure it gets recreated after
    79  	// the bucket is initialized
    80  	dirName := t.TempDir()
    81  
    82  	logger, _ := test.NewNullLogger()
    83  
    84  	b, err := NewBucket(ctx, dirName, "", logger, nil,
    85  		cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), opts...)
    86  	require.Nil(t, err)
    87  	defer b.Shutdown(ctx)
    88  
    89  	require.Nil(t, b.Put([]byte("hello"), []byte("world")))
    90  	require.Nil(t, b.FlushMemtable())
    91  
    92  	files, err := os.ReadDir(dirName)
    93  	require.Nil(t, err)
    94  	fname, ok := findFileWithExt(files, ".cna")
    95  	require.True(t, ok)
    96  
    97  	err = os.RemoveAll(path.Join(dirName, fname))
    98  	require.Nil(t, err)
    99  
   100  	files, err = os.ReadDir(dirName)
   101  	require.Nil(t, err)
   102  	_, ok = findFileWithExt(files, ".cna")
   103  	require.False(t, ok, "verify the file is really gone")
   104  
   105  	// on Windows we have to shutdown the bucket before opening it again
   106  	require.Nil(t, b.Shutdown(ctx))
   107  
   108  	// now create a new bucket and assert that the file is re-created on init
   109  	b2, err := NewBucket(ctx, dirName, "", logger, nil,
   110  		cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), opts...)
   111  	require.Nil(t, err)
   112  	defer b2.Shutdown(ctx)
   113  
   114  	files, err = os.ReadDir(dirName)
   115  	require.Nil(t, err)
   116  	_, ok = findFileWithExt(files, ".cna")
   117  	require.True(t, ok)
   118  }
   119  
   120  func repairCorruptedCNAOnInit(ctx context.Context, t *testing.T, opts []BucketOption) {
   121  	// this test deletes the initial cna and makes sure it gets recreated after
   122  	// the bucket is initialized
   123  	dirName := t.TempDir()
   124  
   125  	logger, _ := test.NewNullLogger()
   126  
   127  	b, err := NewBucket(ctx, dirName, "", logger, nil,
   128  		cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), opts...)
   129  	require.Nil(t, err)
   130  	defer b.Shutdown(ctx)
   131  
   132  	require.Nil(t, b.Put([]byte("hello"), []byte("world")))
   133  	require.Nil(t, b.FlushMemtable())
   134  
   135  	files, err := os.ReadDir(dirName)
   136  	require.Nil(t, err)
   137  	fname, ok := findFileWithExt(files, ".cna")
   138  	require.True(t, ok)
   139  
   140  	// now corrupt the file by replacing the count value without adapting the checksum
   141  	require.Nil(t, corruptCNAFile(path.Join(dirName, fname), 12345))
   142  
   143  	// on Windows we have to shutdown the bucket before opening it again
   144  	require.Nil(t, b.Shutdown(ctx))
   145  	// now create a new bucket and assert that the file is ignored, re-created on
   146  	// init, and the count matches
   147  	b2, err := NewBucket(ctx, dirName, "", logger, nil,
   148  		cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), opts...)
   149  	require.Nil(t, err)
   150  	defer b2.Shutdown(ctx)
   151  
   152  	assert.Equal(t, 1, b2.Count())
   153  }
   154  
   155  func TestCNA_OFF(t *testing.T) {
   156  	ctx := context.Background()
   157  	tests := bucketTests{
   158  		{
   159  			name: "dontCreateCNA",
   160  			f:    dontCreateCNA,
   161  			opts: []BucketOption{
   162  				WithStrategy(StrategyReplace),
   163  				WithCalcCountNetAdditions(false),
   164  			},
   165  		},
   166  		{
   167  			name: "dontRecreateCNA",
   168  			f:    dontRecreateCNA,
   169  			opts: []BucketOption{
   170  				WithStrategy(StrategyReplace),
   171  				WithCalcCountNetAdditions(false),
   172  			},
   173  		},
   174  		{
   175  			name: "dontPrecomputeCNA",
   176  			f:    dontPrecomputeCNA,
   177  			opts: []BucketOption{
   178  				WithStrategy(StrategyReplace),
   179  				WithCalcCountNetAdditions(false),
   180  			},
   181  		},
   182  	}
   183  	tests.run(ctx, t)
   184  }
   185  
   186  func dontCreateCNA(ctx context.Context, t *testing.T, opts []BucketOption) {
   187  	dirName := t.TempDir()
   188  	logger, _ := test.NewNullLogger()
   189  
   190  	b, err := NewBucket(ctx, dirName, "", logger, nil,
   191  		cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
   192  		opts...)
   193  	require.NoError(t, err)
   194  	defer b.Shutdown(ctx)
   195  
   196  	t.Run("populate", func(t *testing.T) {
   197  		require.NoError(t, b.Put([]byte("hello"), []byte("world")))
   198  		require.NoError(t, b.FlushMemtable())
   199  	})
   200  
   201  	t.Run("check files", func(t *testing.T) {
   202  		files, err := os.ReadDir(dirName)
   203  		require.NoError(t, err)
   204  
   205  		_, ok := findFileWithExt(files, ".cna")
   206  		assert.False(t, ok)
   207  	})
   208  
   209  	t.Run("count", func(t *testing.T) {
   210  		assert.Equal(t, 0, b.Count())
   211  	})
   212  }
   213  
   214  func dontRecreateCNA(ctx context.Context, t *testing.T, opts []BucketOption) {
   215  	dirName := t.TempDir()
   216  	logger, _ := test.NewNullLogger()
   217  
   218  	t.Run("create, populate, shutdown", func(t *testing.T) {
   219  		b, err := NewBucket(ctx, dirName, "", logger, nil,
   220  			cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
   221  			opts...)
   222  		require.NoError(t, err)
   223  		defer b.Shutdown(ctx)
   224  
   225  		require.NoError(t, b.Put([]byte("hello"), []byte("world")))
   226  		require.NoError(t, b.FlushMemtable())
   227  	})
   228  
   229  	b2, err := NewBucket(ctx, dirName, "", logger, nil,
   230  		cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
   231  		opts...)
   232  	require.NoError(t, err)
   233  	defer b2.Shutdown(ctx)
   234  
   235  	t.Run("check files", func(t *testing.T) {
   236  		files, err := os.ReadDir(dirName)
   237  		require.NoError(t, err)
   238  
   239  		_, ok := findFileWithExt(files, ".cna")
   240  		assert.False(t, ok)
   241  	})
   242  
   243  	t.Run("count", func(t *testing.T) {
   244  		assert.Equal(t, 0, b2.Count())
   245  	})
   246  }
   247  
   248  func dontPrecomputeCNA(ctx context.Context, t *testing.T, opts []BucketOption) {
   249  	dirName := t.TempDir()
   250  	logger, _ := test.NewNullLogger()
   251  
   252  	b, err := NewBucket(ctx, dirName, "", logger, nil,
   253  		cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
   254  		opts...)
   255  	require.NoError(t, err)
   256  	defer b.Shutdown(ctx)
   257  
   258  	t.Run("populate, compact", func(t *testing.T) {
   259  		require.NoError(t, b.Put([]byte("hello"), []byte("world")))
   260  		require.NoError(t, b.FlushMemtable())
   261  
   262  		require.NoError(t, b.Put([]byte("hello2"), []byte("world2")))
   263  		require.NoError(t, b.FlushMemtable())
   264  
   265  		compacted, err := b.disk.compactOnce()
   266  		require.NoError(t, err)
   267  		require.True(t, compacted)
   268  	})
   269  
   270  	t.Run("check files", func(t *testing.T) {
   271  		files, err := os.ReadDir(dirName)
   272  		require.NoError(t, err)
   273  
   274  		_, ok := findFileWithExt(files, ".cna")
   275  		assert.False(t, ok)
   276  	})
   277  
   278  	t.Run("count", func(t *testing.T) {
   279  		assert.Equal(t, 0, b.Count())
   280  	})
   281  }
   282  
   283  func findFileWithExt(files []os.DirEntry, ext string) (string, bool) {
   284  	for _, file := range files {
   285  		fname := file.Name()
   286  		if strings.HasSuffix(fname, ext) {
   287  			return fname, true
   288  		}
   289  
   290  	}
   291  	return "", false
   292  }
   293  
   294  func corruptCNAFile(fname string, corruptValue uint64) error {
   295  	f, err := os.Open(fname)
   296  	if err != nil {
   297  		return err
   298  	}
   299  
   300  	data, err := io.ReadAll(f)
   301  	if err != nil {
   302  		return err
   303  	}
   304  
   305  	if err := f.Close(); err != nil {
   306  		return err
   307  	}
   308  
   309  	binary.LittleEndian.PutUint64(data[4:12], corruptValue)
   310  
   311  	f, err = os.Create(fname)
   312  	if err != nil {
   313  		return err
   314  	}
   315  
   316  	_, err = f.Write(data)
   317  	if err != nil {
   318  		return err
   319  	}
   320  
   321  	return f.Close()
   322  }