github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/segment_precompute_for_compation_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package lsmkv 13 14 import ( 15 "context" 16 "fmt" 17 "os" 18 "path" 19 "strings" 20 "testing" 21 22 "github.com/sirupsen/logrus/hooks/test" 23 "github.com/stretchr/testify/assert" 24 "github.com/stretchr/testify/require" 25 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv/segmentindex" 26 "github.com/weaviate/weaviate/entities/cyclemanager" 27 ) 28 29 func TestPrecomputeForCompaction(t *testing.T) { 30 ctx := context.Background() 31 tests := bucketTests{ 32 { 33 name: "precomputeSegmentMeta_Replace", 34 f: precomputeSegmentMeta_Replace, 35 opts: []BucketOption{ 36 WithStrategy(StrategyReplace), 37 WithSecondaryIndices(1), 38 }, 39 }, 40 { 41 name: "precomputeSegmentMeta_Set", 42 f: precomputeSegmentMeta_Set, 43 opts: []BucketOption{ 44 WithStrategy(StrategySetCollection), 45 }, 46 }, 47 } 48 tests.run(ctx, t) 49 } 50 51 func precomputeSegmentMeta_Replace(ctx context.Context, t *testing.T, opts []BucketOption) { 52 // first build a complete reference segment of which we can then strip its 53 // meta 54 dirName := t.TempDir() 55 56 logger, _ := test.NewNullLogger() 57 58 b, err := NewBucket(ctx, dirName, "", logger, nil, 59 cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), opts...) 60 require.Nil(t, err) 61 defer b.Shutdown(ctx) 62 63 require.Nil(t, b.Put([]byte("hello"), []byte("world"), 64 WithSecondaryKey(0, []byte("bonjour")))) 65 require.Nil(t, b.FlushMemtable()) 66 67 for _, ext := range []string{".secondary.0.bloom", ".bloom", ".cna"} { 68 files, err := os.ReadDir(dirName) 69 require.Nil(t, err) 70 fname, ok := findFileWithExt(files, ext) 71 require.True(t, ok) 72 73 err = os.RemoveAll(path.Join(dirName, fname)) 74 require.Nil(t, err) 75 76 files, err = os.ReadDir(dirName) 77 require.Nil(t, err) 78 _, ok = findFileWithExt(files, ext) 79 require.False(t, ok, "verify the file is really gone") 80 } 81 82 require.Nil(t, b.Shutdown(ctx)) 83 84 // now identify the segment file and rename it to be a tmp file 85 files, err := os.ReadDir(dirName) 86 require.Nil(t, err) 87 fname, ok := findFileWithExt(files, ".db") 88 require.True(t, ok) 89 90 segmentTmp := path.Join(dirName, fmt.Sprintf("%s.tmp", fname)) 91 err = os.Rename(path.Join(dirName, fname), segmentTmp) 92 require.Nil(t, err) 93 94 fileNames, err := preComputeSegmentMeta(segmentTmp, 1, logger, true, true) 95 require.Nil(t, err) 96 97 // there should be 4 files and they should all have a .tmp suffix: 98 // segment.db.tmp 99 // segment.cna.tmp 100 // segment.bloom.tmp 101 // segment.secondary.0.bloom.tmp 102 assert.Len(t, fileNames, 4) 103 for _, fName := range fileNames { 104 assert.True(t, strings.HasSuffix(fName, ".tmp")) 105 } 106 } 107 108 // Precomputing of segment is almost identical across segment types, however, 109 // only Replace supports CNA, so we should test at least one other segment type 110 // which does not support CNA, represented here by using the "Set" type 111 func precomputeSegmentMeta_Set(ctx context.Context, t *testing.T, opts []BucketOption) { 112 // first build a complete reference segment of which we can then strip its 113 // meta 114 dirName := t.TempDir() 115 116 logger, _ := test.NewNullLogger() 117 118 b, err := NewBucket(ctx, dirName, "", logger, nil, 119 cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), opts...) 120 require.Nil(t, err) 121 defer b.Shutdown(ctx) 122 123 err = b.SetAdd([]byte("greetings"), [][]byte{[]byte("hello"), []byte("hola")}) 124 require.Nil(t, err) 125 require.Nil(t, b.FlushMemtable()) 126 127 files, err := os.ReadDir(dirName) 128 require.Nil(t, err) 129 fname, ok := findFileWithExt(files, ".bloom") 130 require.True(t, ok) 131 132 err = os.RemoveAll(path.Join(dirName, fname)) 133 require.Nil(t, err) 134 135 // verify it's actually gone 136 files, err = os.ReadDir(dirName) 137 require.Nil(t, err) 138 _, ok = findFileWithExt(files, ".bloom") 139 require.False(t, ok) 140 141 require.Nil(t, b.Shutdown(ctx)) 142 143 // now identify the segment file and rename it to be a tmp file 144 fname, ok = findFileWithExt(files, ".db") 145 require.True(t, ok) 146 147 segmentTmp := path.Join(dirName, fmt.Sprintf("%s.tmp", fname)) 148 err = os.Rename(path.Join(dirName, fname), segmentTmp) 149 require.Nil(t, err) 150 151 fileNames, err := preComputeSegmentMeta(segmentTmp, 1, logger, true, true) 152 require.Nil(t, err) 153 154 // there should be 2 files and they should all have a .tmp suffix: 155 // segment.db.tmp 156 // segment.bloom.tmp 157 assert.Len(t, fileNames, 2) 158 for _, fName := range fileNames { 159 assert.True(t, strings.HasSuffix(fName, ".tmp")) 160 } 161 } 162 163 func TestPrecomputeSegmentMeta_UnhappyPaths(t *testing.T) { 164 t.Run("file without .tmp suffix", func(t *testing.T) { 165 logger, _ := test.NewNullLogger() 166 _, err := preComputeSegmentMeta("a-path-without-the-required-suffix", 7, logger, true, true) 167 require.NotNil(t, err) 168 assert.Contains(t, err.Error(), "expects a .tmp segment") 169 }) 170 171 t.Run("file does not exist", func(t *testing.T) { 172 logger, _ := test.NewNullLogger() 173 _, err := preComputeSegmentMeta("i-dont-exist.tmp", 7, logger, true, true) 174 require.NotNil(t, err) 175 unixErr := "no such file or directory" 176 windowsErr := "The system cannot find the file specified." 177 assert.True(t, strings.Contains(err.Error(), unixErr) || strings.Contains(err.Error(), windowsErr)) 178 }) 179 180 t.Run("segment header can't be parsed", func(t *testing.T) { 181 logger, _ := test.NewNullLogger() 182 dirName := t.TempDir() 183 segmentName := path.Join(dirName, "my-segment.tmp") 184 185 header := &segmentindex.Header{ 186 Version: 100, // only supported version as of writing this test is 0 187 } 188 189 f, err := os.Create(segmentName) 190 require.Nil(t, err) 191 192 _, err = header.WriteTo(f) 193 require.Nil(t, err) 194 195 err = f.Close() 196 require.Nil(t, err) 197 198 _, err = preComputeSegmentMeta(segmentName, 7, logger, true, true) 199 require.NotNil(t, err) 200 assert.Contains(t, err.Error(), "parse header") 201 }) 202 203 t.Run("unsupported strategy", func(t *testing.T) { 204 logger, _ := test.NewNullLogger() 205 dirName := t.TempDir() 206 segmentName := path.Join(dirName, "my-segment.tmp") 207 208 header := &segmentindex.Header{ 209 Version: 0, 210 Strategy: segmentindex.Strategy(100), // this strategy doesn't exist 211 } 212 213 f, err := os.Create(segmentName) 214 require.Nil(t, err) 215 216 _, err = header.WriteTo(f) 217 require.Nil(t, err) 218 219 err = f.Close() 220 require.Nil(t, err) 221 222 _, err = preComputeSegmentMeta(segmentName, 7, logger, true, true) 223 require.NotNil(t, err) 224 assert.Contains(t, err.Error(), "unsupported strategy") 225 }) 226 }