github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/segment_precompute_for_compation_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package lsmkv
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"os"
    18  	"path"
    19  	"strings"
    20  	"testing"
    21  
    22  	"github.com/sirupsen/logrus/hooks/test"
    23  	"github.com/stretchr/testify/assert"
    24  	"github.com/stretchr/testify/require"
    25  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv/segmentindex"
    26  	"github.com/weaviate/weaviate/entities/cyclemanager"
    27  )
    28  
    29  func TestPrecomputeForCompaction(t *testing.T) {
    30  	ctx := context.Background()
    31  	tests := bucketTests{
    32  		{
    33  			name: "precomputeSegmentMeta_Replace",
    34  			f:    precomputeSegmentMeta_Replace,
    35  			opts: []BucketOption{
    36  				WithStrategy(StrategyReplace),
    37  				WithSecondaryIndices(1),
    38  			},
    39  		},
    40  		{
    41  			name: "precomputeSegmentMeta_Set",
    42  			f:    precomputeSegmentMeta_Set,
    43  			opts: []BucketOption{
    44  				WithStrategy(StrategySetCollection),
    45  			},
    46  		},
    47  	}
    48  	tests.run(ctx, t)
    49  }
    50  
    51  func precomputeSegmentMeta_Replace(ctx context.Context, t *testing.T, opts []BucketOption) {
    52  	// first build a complete reference segment of which we can then strip its
    53  	// meta
    54  	dirName := t.TempDir()
    55  
    56  	logger, _ := test.NewNullLogger()
    57  
    58  	b, err := NewBucket(ctx, dirName, "", logger, nil,
    59  		cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), opts...)
    60  	require.Nil(t, err)
    61  	defer b.Shutdown(ctx)
    62  
    63  	require.Nil(t, b.Put([]byte("hello"), []byte("world"),
    64  		WithSecondaryKey(0, []byte("bonjour"))))
    65  	require.Nil(t, b.FlushMemtable())
    66  
    67  	for _, ext := range []string{".secondary.0.bloom", ".bloom", ".cna"} {
    68  		files, err := os.ReadDir(dirName)
    69  		require.Nil(t, err)
    70  		fname, ok := findFileWithExt(files, ext)
    71  		require.True(t, ok)
    72  
    73  		err = os.RemoveAll(path.Join(dirName, fname))
    74  		require.Nil(t, err)
    75  
    76  		files, err = os.ReadDir(dirName)
    77  		require.Nil(t, err)
    78  		_, ok = findFileWithExt(files, ext)
    79  		require.False(t, ok, "verify the file is really gone")
    80  	}
    81  
    82  	require.Nil(t, b.Shutdown(ctx))
    83  
    84  	// now identify the segment file and rename it to be a tmp file
    85  	files, err := os.ReadDir(dirName)
    86  	require.Nil(t, err)
    87  	fname, ok := findFileWithExt(files, ".db")
    88  	require.True(t, ok)
    89  
    90  	segmentTmp := path.Join(dirName, fmt.Sprintf("%s.tmp", fname))
    91  	err = os.Rename(path.Join(dirName, fname), segmentTmp)
    92  	require.Nil(t, err)
    93  
    94  	fileNames, err := preComputeSegmentMeta(segmentTmp, 1, logger, true, true)
    95  	require.Nil(t, err)
    96  
    97  	// there should be 4 files and they should all have a .tmp suffix:
    98  	// segment.db.tmp
    99  	// segment.cna.tmp
   100  	// segment.bloom.tmp
   101  	// segment.secondary.0.bloom.tmp
   102  	assert.Len(t, fileNames, 4)
   103  	for _, fName := range fileNames {
   104  		assert.True(t, strings.HasSuffix(fName, ".tmp"))
   105  	}
   106  }
   107  
   108  // Precomputing of segment is almost identical across segment types, however,
   109  // only Replace supports CNA, so we should test at least one other segment type
   110  // which does not support CNA, represented here by using the "Set" type
   111  func precomputeSegmentMeta_Set(ctx context.Context, t *testing.T, opts []BucketOption) {
   112  	// first build a complete reference segment of which we can then strip its
   113  	// meta
   114  	dirName := t.TempDir()
   115  
   116  	logger, _ := test.NewNullLogger()
   117  
   118  	b, err := NewBucket(ctx, dirName, "", logger, nil,
   119  		cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), opts...)
   120  	require.Nil(t, err)
   121  	defer b.Shutdown(ctx)
   122  
   123  	err = b.SetAdd([]byte("greetings"), [][]byte{[]byte("hello"), []byte("hola")})
   124  	require.Nil(t, err)
   125  	require.Nil(t, b.FlushMemtable())
   126  
   127  	files, err := os.ReadDir(dirName)
   128  	require.Nil(t, err)
   129  	fname, ok := findFileWithExt(files, ".bloom")
   130  	require.True(t, ok)
   131  
   132  	err = os.RemoveAll(path.Join(dirName, fname))
   133  	require.Nil(t, err)
   134  
   135  	// verify it's actually gone
   136  	files, err = os.ReadDir(dirName)
   137  	require.Nil(t, err)
   138  	_, ok = findFileWithExt(files, ".bloom")
   139  	require.False(t, ok)
   140  
   141  	require.Nil(t, b.Shutdown(ctx))
   142  
   143  	// now identify the segment file and rename it to be a tmp file
   144  	fname, ok = findFileWithExt(files, ".db")
   145  	require.True(t, ok)
   146  
   147  	segmentTmp := path.Join(dirName, fmt.Sprintf("%s.tmp", fname))
   148  	err = os.Rename(path.Join(dirName, fname), segmentTmp)
   149  	require.Nil(t, err)
   150  
   151  	fileNames, err := preComputeSegmentMeta(segmentTmp, 1, logger, true, true)
   152  	require.Nil(t, err)
   153  
   154  	// there should be 2 files and they should all have a .tmp suffix:
   155  	// segment.db.tmp
   156  	// segment.bloom.tmp
   157  	assert.Len(t, fileNames, 2)
   158  	for _, fName := range fileNames {
   159  		assert.True(t, strings.HasSuffix(fName, ".tmp"))
   160  	}
   161  }
   162  
   163  func TestPrecomputeSegmentMeta_UnhappyPaths(t *testing.T) {
   164  	t.Run("file without .tmp suffix", func(t *testing.T) {
   165  		logger, _ := test.NewNullLogger()
   166  		_, err := preComputeSegmentMeta("a-path-without-the-required-suffix", 7, logger, true, true)
   167  		require.NotNil(t, err)
   168  		assert.Contains(t, err.Error(), "expects a .tmp segment")
   169  	})
   170  
   171  	t.Run("file does not exist", func(t *testing.T) {
   172  		logger, _ := test.NewNullLogger()
   173  		_, err := preComputeSegmentMeta("i-dont-exist.tmp", 7, logger, true, true)
   174  		require.NotNil(t, err)
   175  		unixErr := "no such file or directory"
   176  		windowsErr := "The system cannot find the file specified."
   177  		assert.True(t, strings.Contains(err.Error(), unixErr) || strings.Contains(err.Error(), windowsErr))
   178  	})
   179  
   180  	t.Run("segment header can't be parsed", func(t *testing.T) {
   181  		logger, _ := test.NewNullLogger()
   182  		dirName := t.TempDir()
   183  		segmentName := path.Join(dirName, "my-segment.tmp")
   184  
   185  		header := &segmentindex.Header{
   186  			Version: 100, // only supported version as of writing this test is 0
   187  		}
   188  
   189  		f, err := os.Create(segmentName)
   190  		require.Nil(t, err)
   191  
   192  		_, err = header.WriteTo(f)
   193  		require.Nil(t, err)
   194  
   195  		err = f.Close()
   196  		require.Nil(t, err)
   197  
   198  		_, err = preComputeSegmentMeta(segmentName, 7, logger, true, true)
   199  		require.NotNil(t, err)
   200  		assert.Contains(t, err.Error(), "parse header")
   201  	})
   202  
   203  	t.Run("unsupported strategy", func(t *testing.T) {
   204  		logger, _ := test.NewNullLogger()
   205  		dirName := t.TempDir()
   206  		segmentName := path.Join(dirName, "my-segment.tmp")
   207  
   208  		header := &segmentindex.Header{
   209  			Version:  0,
   210  			Strategy: segmentindex.Strategy(100), // this strategy doesn't exist
   211  		}
   212  
   213  		f, err := os.Create(segmentName)
   214  		require.Nil(t, err)
   215  
   216  		_, err = header.WriteTo(f)
   217  		require.Nil(t, err)
   218  
   219  		err = f.Close()
   220  		require.Nil(t, err)
   221  
   222  		_, err = preComputeSegmentMeta(segmentName, 7, logger, true, true)
   223  		require.NotNil(t, err)
   224  		assert.Contains(t, err.Error(), "unsupported strategy")
   225  	})
   226  }