github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/compaction_integration2_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package lsmkv 13 14 import ( 15 "context" 16 "fmt" 17 "math/rand" 18 "testing" 19 20 "github.com/sirupsen/logrus" 21 "github.com/sirupsen/logrus/hooks/test" 22 "github.com/stretchr/testify/assert" 23 "github.com/stretchr/testify/require" 24 "github.com/weaviate/weaviate/entities/cyclemanager" 25 ) 26 27 func TestCompactionReplaceStrategyStraggler(t *testing.T) { 28 opts := []BucketOption{WithStrategy(StrategyReplace)} 29 size := 200 30 31 type kv struct { 32 key []byte 33 value []byte 34 delete bool 35 } 36 37 var segment1 []kv 38 var segment2 []kv 39 var segment3 []kv 40 var expected []kv 41 var bucket *Bucket 42 43 dirName := t.TempDir() 44 45 t.Run("create test data", func(t *testing.T) { 46 // The test data is split into 4 scenarios evenly: 47 // 48 // 1.) created in the first segment, never touched again 49 // 2.) created in the first segment, updated in the second 50 // 3.) created in the first segment, deleted in the second 51 // 4.) not present in the first segment, created in the second 52 for i := 0; i < size; i++ { 53 key := []byte(fmt.Sprintf("key-%3d", i)) 54 originalValue := []byte(fmt.Sprintf("value-%3d-original", i)) 55 56 switch i % 4 { 57 case 0: 58 // add to segment 1 59 segment1 = append(segment1, kv{ 60 key: key, 61 value: originalValue, 62 }) 63 64 // leave this element untouched in the second segment 65 expected = append(expected, kv{ 66 key: key, 67 value: originalValue, 68 }) 69 case 1: 70 // add to segment 1 71 segment1 = append(segment1, kv{ 72 key: key, 73 value: originalValue, 74 }) 75 76 // update in the second segment 77 updatedValue := []byte(fmt.Sprintf("value-%3d-updated", i)) 78 segment2 = append(segment2, kv{ 79 key: key, 80 value: updatedValue, 81 }) 82 // update in the third segment 83 updatedValue = []byte(fmt.Sprintf("value-%3d-updated-twice", i)) 84 segment3 = append(segment3, kv{ 85 key: key, 86 value: updatedValue, 87 }) 88 89 expected = append(expected, kv{ 90 key: key, 91 value: updatedValue, 92 }) 93 case 2: 94 // add to segment 1 95 segment1 = append(segment1, kv{ 96 key: key, 97 value: originalValue, 98 }) 99 100 // delete in the third segment 101 segment3 = append(segment3, kv{ 102 key: key, 103 delete: true, 104 }) 105 106 // do not add to expected at all 107 108 case 3: 109 // do not add to segment 1 110 111 // only add to segment 3 (first entry) 112 segment3 = append(segment3, kv{ 113 key: key, 114 value: originalValue, 115 }) 116 117 expected = append(expected, kv{ 118 key: key, 119 value: originalValue, 120 }) 121 } 122 } 123 }) 124 125 t.Run("shuffle the import order for each segment", func(t *testing.T) { 126 // this is to make sure we don't accidentally rely on the import order 127 rand.Shuffle(len(segment1), func(i, j int) { 128 segment1[i], segment1[j] = segment1[j], segment1[i] 129 }) 130 rand.Shuffle(len(segment2), func(i, j int) { 131 segment2[i], segment2[j] = segment2[j], segment2[i] 132 }) 133 }) 134 135 t.Run("init bucket", func(t *testing.T) { 136 b, err := NewBucket(context.TODO(), dirName, "", nullLogger2(), nil, 137 cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), opts...) 138 require.Nil(t, err) 139 140 // so big it effectively never triggers as part of this test 141 b.SetMemtableThreshold(1e9) 142 143 bucket = b 144 }) 145 146 t.Run("import segment 1", func(t *testing.T) { 147 for _, pair := range segment1 { 148 if !pair.delete { 149 err := bucket.Put(pair.key, pair.value) 150 require.Nil(t, err) 151 } else { 152 err := bucket.Delete(pair.key) 153 require.Nil(t, err) 154 155 } 156 } 157 }) 158 159 t.Run("flush to disk", func(t *testing.T) { 160 require.Nil(t, bucket.FlushAndSwitch()) 161 }) 162 163 t.Run("import segment 2", func(t *testing.T) { 164 for _, pair := range segment2 { 165 if !pair.delete { 166 err := bucket.Put(pair.key, pair.value) 167 require.Nil(t, err) 168 } else { 169 err := bucket.Delete(pair.key) 170 require.Nil(t, err) 171 172 } 173 } 174 }) 175 176 t.Run("flush to disk", func(t *testing.T) { 177 require.Nil(t, bucket.FlushAndSwitch()) 178 }) 179 180 t.Run("import segment 3", func(t *testing.T) { 181 for _, pair := range segment3 { 182 if !pair.delete { 183 err := bucket.Put(pair.key, pair.value) 184 require.Nil(t, err) 185 } else { 186 err := bucket.Delete(pair.key) 187 require.Nil(t, err) 188 189 } 190 } 191 }) 192 193 t.Run("flush to disk", func(t *testing.T) { 194 require.Nil(t, bucket.FlushAndSwitch()) 195 }) 196 197 t.Run("verify control before compaction", func(t *testing.T) { 198 var retrieved []kv 199 200 c := bucket.Cursor() 201 defer c.Close() 202 203 for k, v := c.First(); k != nil; k, v = c.Next() { 204 keyCopy := copyByteSlice2(k) 205 valueCopy := copyByteSlice2(v) 206 retrieved = append(retrieved, kv{ 207 key: keyCopy, 208 value: valueCopy, 209 }) 210 } 211 212 assert.Equal(t, expected, retrieved) 213 }) 214 215 t.Run("verify count control before compaction", func(*testing.T) { 216 assert.Equal(t, len(expected), bucket.Count()) 217 }) 218 219 t.Run("compact until no longer eligible", func(t *testing.T) { 220 var compacted bool 221 var err error 222 for compacted, err = bucket.disk.compactOnce(); err == nil && compacted; compacted, err = bucket.disk.compactOnce() { 223 } 224 require.Nil(t, err) 225 }) 226 227 t.Run("verify control after compaction", func(t *testing.T) { 228 var retrieved []kv 229 230 c := bucket.Cursor() 231 defer c.Close() 232 233 for k, v := c.First(); k != nil; k, v = c.Next() { 234 keyCopy := copyByteSlice2(k) 235 valueCopy := copyByteSlice2(v) 236 retrieved = append(retrieved, kv{ 237 key: keyCopy, 238 value: valueCopy, 239 }) 240 } 241 242 assert.Equal(t, expected, retrieved) 243 }) 244 245 t.Run("verify control using individual get operations", 246 func(t *testing.T) { 247 for _, pair := range expected { 248 retrieved, err := bucket.Get(pair.key) 249 require.NoError(t, err) 250 251 assert.Equal(t, pair.value, retrieved) 252 } 253 }) 254 255 t.Run("verify count after compaction", func(*testing.T) { 256 assert.Equal(t, len(expected), bucket.Count()) 257 }) 258 } 259 260 func nullLogger2() logrus.FieldLogger { 261 log, _ := test.NewNullLogger() 262 return log 263 } 264 265 func copyByteSlice2(src []byte) []byte { 266 dst := make([]byte, len(src)) 267 copy(dst, src) 268 return dst 269 }