github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/bucket_threshold_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 //go:build integrationTest 13 14 package lsmkv 15 16 import ( 17 "context" 18 "crypto/rand" 19 "encoding/json" 20 "sync" 21 "testing" 22 "time" 23 24 "github.com/sirupsen/logrus/hooks/test" 25 26 "github.com/stretchr/testify/assert" 27 "github.com/stretchr/testify/require" 28 "github.com/weaviate/weaviate/entities/cyclemanager" 29 ) 30 31 var logger, _ = test.NewNullLogger() 32 33 // This test ensures that the WAL threshold is being adhered to, and that a 34 // flush to segment followed by a switch to a new WAL is being performed 35 // once the threshold is reached 36 func TestWriteAheadLogThreshold_Replace(t *testing.T) { 37 dirName := t.TempDir() 38 39 amount := 100 40 keys := make([][]byte, amount) 41 values := make([][]byte, amount) 42 43 walThreshold := uint64(4096) 44 tolerance := 4. 45 46 flushCallbacks := cyclemanager.NewCallbackGroup("flush", nullLogger(), 1) 47 flushCycle := cyclemanager.NewManager(cyclemanager.MemtableFlushCycleTicker(), flushCallbacks.CycleCallback, logger) 48 flushCycle.Start() 49 50 bucket, err := NewBucket(testCtx(), dirName, "", nullLogger(), nil, 51 cyclemanager.NewCallbackGroupNoop(), flushCallbacks, 52 WithStrategy(StrategyReplace), 53 WithMemtableThreshold(1024*1024*1024), 54 WithWalThreshold(walThreshold)) 55 require.Nil(t, err) 56 57 // generate only a small amount of sequential values. this allows 58 // us to keep the memtable small (the net additions will be close 59 // to zero), and focus on testing the WAL threshold 60 t.Run("generate sequential data", func(t *testing.T) { 61 for i := range keys { 62 n, err := json.Marshal(i) 63 require.Nil(t, err) 64 65 keys[i], values[i] = n, n 66 } 67 }) 68 69 t.Run("check switchover during insertion", func(t *testing.T) { 70 // Importing data for over 10s with 1.6ms break between each object 71 // should result in ~100kB of commitlog data in total. 72 // With couple of flush attempts happening during this 10s period, 73 // and with threshold set to 4kB, first .wal size should be much smaller than 100kb 74 // when commitlog switched to new .wal file. 75 ctxTimeout, cancelTimeout := context.WithTimeout(context.Background(), 10*time.Second) 76 77 wg := &sync.WaitGroup{} 78 wg.Add(1) 79 go func() { 80 for { 81 for i := range keys { 82 if i%100 == 0 && ctxTimeout.Err() != nil { 83 wg.Done() 84 return 85 } 86 assert.Nil(t, bucket.Put(keys[i], values[i])) 87 time.Sleep(1600 * time.Microsecond) 88 } 89 } 90 }() 91 92 var firstWalFile string 93 var firstWalSize int64 94 out: 95 for { 96 time.Sleep(time.Millisecond) 97 if ctxTimeout.Err() != nil { 98 t.Fatalf("Import finished without flushing in the meantime. Size of first WAL file was (%d)", firstWalSize) 99 } 100 101 bucket.flushLock.RLock() 102 walFile := bucket.active.commitlog.path 103 walSize := bucket.active.commitlog.Size() 104 bucket.flushLock.RUnlock() 105 106 if firstWalFile == "" { 107 firstWalFile = walFile 108 } 109 110 if firstWalFile == walFile { 111 firstWalSize = walSize 112 } else { 113 // new path found; flush must have occurred - stop import and exit loop 114 cancelTimeout() 115 break out 116 } 117 } 118 119 wg.Wait() 120 if !isSizeWithinTolerance(t, uint64(firstWalSize), walThreshold, tolerance) { 121 t.Fatalf("WAL size (%d) was allowed to increase beyond threshold (%d) with tolerance of (%f)%%", 122 firstWalSize, walThreshold, tolerance*100) 123 } 124 }) 125 126 ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) 127 defer cancel() 128 129 require.Nil(t, bucket.Shutdown(ctx)) 130 require.Nil(t, flushCycle.StopAndWait(ctx)) 131 } 132 133 // This test ensures that the Memtable threshold is being adhered to, and 134 // that a flush to segment followed by a switch to a new WAL is being 135 // performed once the threshold is reached 136 func TestMemtableThreshold_Replace(t *testing.T) { 137 dirName := t.TempDir() 138 139 amount := 10000 140 sizePerValue := 8 141 142 keys := make([][]byte, amount) 143 values := make([][]byte, amount) 144 145 memtableThreshold := uint64(4096) 146 tolerance := 4. 147 148 flushCallbacks := cyclemanager.NewCallbackGroup("flush", nullLogger(), 1) 149 flushCycle := cyclemanager.NewManager(cyclemanager.MemtableFlushCycleTicker(), flushCallbacks.CycleCallback, logger) 150 flushCycle.Start() 151 152 bucket, err := NewBucket(testCtx(), dirName, "", nullLogger(), nil, 153 cyclemanager.NewCallbackGroupNoop(), flushCallbacks, 154 WithStrategy(StrategyReplace), 155 WithMemtableThreshold(memtableThreshold)) 156 require.Nil(t, err) 157 158 t.Run("generate random data", func(t *testing.T) { 159 for i := range keys { 160 n, err := json.Marshal(i) 161 require.Nil(t, err) 162 163 keys[i] = n 164 values[i] = make([]byte, sizePerValue) 165 rand.Read(values[i]) 166 } 167 }) 168 169 t.Run("check switchover during insertion", func(t *testing.T) { 170 // Importing data for over 10s with 0.8ms break between each object 171 // should result in ~100kB of memtable data. 172 // With couple of flush attempts happening during this 10s period, 173 // and with threshold set to 4kB, first memtable size should be much smaller than 100kb 174 // when memtable flushed and replaced with new one 175 ctxTimeout, cancelTimeout := context.WithTimeout(context.Background(), 10*time.Second) 176 177 wg := &sync.WaitGroup{} 178 wg.Add(1) 179 go func() { 180 for { 181 for i := range keys { 182 if i%100 == 0 && ctxTimeout.Err() != nil { 183 wg.Done() 184 return 185 } 186 assert.Nil(t, bucket.Put(keys[i], values[i])) 187 time.Sleep(800 * time.Microsecond) 188 } 189 } 190 }() 191 192 var firstMemtablePath string 193 var firstMemtableSize uint64 194 out: 195 for { 196 time.Sleep(time.Millisecond) 197 if ctxTimeout.Err() != nil { 198 t.Fatalf("Import finished without flushing in the meantime. Size of first memtable was (%d)", firstMemtableSize) 199 } 200 201 bucket.flushLock.RLock() 202 activePath := bucket.active.path 203 activeSize := bucket.active.Size() 204 bucket.flushLock.RUnlock() 205 206 if firstMemtablePath == "" { 207 firstMemtablePath = activePath 208 } 209 210 if firstMemtablePath == activePath { 211 firstMemtableSize = activeSize 212 } else { 213 // new path found; flush must have occurred - stop import and exit loop 214 cancelTimeout() 215 break out 216 } 217 } 218 219 wg.Wait() 220 if !isSizeWithinTolerance(t, uint64(firstMemtableSize), memtableThreshold, tolerance) { 221 t.Fatalf("Memtable size (%d) was allowed to increase beyond threshold (%d) with tolerance of (%f)%%", 222 firstMemtableSize, memtableThreshold, tolerance*100) 223 } 224 }) 225 226 ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) 227 defer cancel() 228 229 require.Nil(t, bucket.Shutdown(ctx)) 230 require.Nil(t, flushCycle.StopAndWait(ctx)) 231 } 232 233 func isSizeWithinTolerance(t *testing.T, detectedSize uint64, threshold uint64, tolerance float64) bool { 234 return detectedSize > 0 && float64(detectedSize) <= float64(threshold)*(tolerance+1) 235 } 236 237 func TestMemtableFlushesIfDirty(t *testing.T) { 238 t.Run("an empty memtable is not flushed", func(t *testing.T) { 239 dirName := t.TempDir() 240 241 flushCallbacks := cyclemanager.NewCallbackGroup("flush", nullLogger(), 1) 242 flushCycle := cyclemanager.NewManager(cyclemanager.MemtableFlushCycleTicker(), flushCallbacks.CycleCallback, logger) 243 flushCycle.Start() 244 245 bucket, err := NewBucket(testCtx(), dirName, "", nullLogger(), nil, 246 cyclemanager.NewCallbackGroupNoop(), flushCallbacks, 247 WithStrategy(StrategyReplace), 248 WithMemtableThreshold(1e12), // large enough to not affect this test 249 WithWalThreshold(1e12), // large enough to not affect this test 250 WithDirtyThreshold(10*time.Millisecond), 251 ) 252 require.Nil(t, err) 253 254 t.Run("assert no segments exist initially", func(t *testing.T) { 255 bucket.disk.maintenanceLock.RLock() 256 defer bucket.disk.maintenanceLock.RUnlock() 257 258 assert.Equal(t, 0, len(bucket.disk.segments)) 259 }) 260 261 t.Run("wait until dirty threshold has passed", func(t *testing.T) { 262 // First flush attempt should occur after ~100ms after creating bucket. 263 // Buffer of 200ms guarantees, flush will be called during sleep period. 264 time.Sleep(200 * time.Millisecond) 265 }) 266 267 t.Run("assert no segments exist even after passing the dirty threshold", func(t *testing.T) { 268 bucket.disk.maintenanceLock.RLock() 269 defer bucket.disk.maintenanceLock.RUnlock() 270 271 assert.Equal(t, 0, len(bucket.disk.segments)) 272 }) 273 274 t.Run("shutdown bucket", func(t *testing.T) { 275 ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) 276 defer cancel() 277 require.Nil(t, bucket.Shutdown(ctx)) 278 require.Nil(t, flushCycle.StopAndWait(ctx)) 279 }) 280 }) 281 282 t.Run("a dirty memtable is flushed once dirty period has passed with single write", func(t *testing.T) { 283 dirName := t.TempDir() 284 285 flushCallbacks := cyclemanager.NewCallbackGroup("flush", nullLogger(), 1) 286 flushCycle := cyclemanager.NewManager(cyclemanager.MemtableFlushCycleTicker(), flushCallbacks.CycleCallback, logger) 287 flushCycle.Start() 288 289 bucket, err := NewBucket(testCtx(), dirName, "", nullLogger(), nil, 290 cyclemanager.NewCallbackGroupNoop(), flushCallbacks, 291 WithStrategy(StrategyReplace), 292 WithMemtableThreshold(1e12), // large enough to not affect this test 293 WithWalThreshold(1e12), // large enough to not affect this test 294 WithDirtyThreshold(50*time.Millisecond), 295 ) 296 require.Nil(t, err) 297 298 t.Run("import something to make it dirty", func(t *testing.T) { 299 require.Nil(t, bucket.Put([]byte("some-key"), []byte("some-value"))) 300 }) 301 302 t.Run("assert no segments exist initially", func(t *testing.T) { 303 bucket.disk.maintenanceLock.RLock() 304 defer bucket.disk.maintenanceLock.RUnlock() 305 306 assert.Equal(t, 0, len(bucket.disk.segments)) 307 }) 308 309 t.Run("wait until dirty threshold has passed", func(t *testing.T) { 310 // First flush attempt should occur after ~100ms after creating bucket. 311 // Buffer of 200ms guarantees, flush will be called during sleep period. 312 time.Sleep(200 * time.Millisecond) 313 }) 314 315 t.Run("assert that a flush has occurred (and one segment exists)", func(t *testing.T) { 316 bucket.disk.maintenanceLock.RLock() 317 defer bucket.disk.maintenanceLock.RUnlock() 318 319 assert.Equal(t, 1, len(bucket.disk.segments)) 320 }) 321 322 t.Run("shutdown bucket", func(t *testing.T) { 323 ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) 324 defer cancel() 325 require.Nil(t, bucket.Shutdown(ctx)) 326 require.Nil(t, flushCycle.StopAndWait(ctx)) 327 }) 328 }) 329 330 t.Run("a dirty memtable is flushed once dirty period has passed with ongoing writes", func(t *testing.T) { 331 dirName := t.TempDir() 332 333 flushCallbacks := cyclemanager.NewCallbackGroup("flush", nullLogger(), 1) 334 flushCycle := cyclemanager.NewManager(cyclemanager.MemtableFlushCycleTicker(), flushCallbacks.CycleCallback, logger) 335 flushCycle.Start() 336 337 bucket, err := NewBucket(testCtx(), dirName, "", nullLogger(), nil, 338 cyclemanager.NewCallbackGroupNoop(), flushCallbacks, 339 WithStrategy(StrategyReplace), 340 WithMemtableThreshold(1e12), // large enough to not affect this test 341 WithWalThreshold(1e12), // large enough to not affect this test 342 WithDirtyThreshold(50*time.Millisecond), 343 ) 344 require.Nil(t, err) 345 346 t.Run("import something to make it dirty", func(t *testing.T) { 347 require.Nil(t, bucket.Put([]byte("some-key"), []byte("some-value"))) 348 }) 349 350 t.Run("assert no segments exist initially", func(t *testing.T) { 351 bucket.disk.maintenanceLock.RLock() 352 defer bucket.disk.maintenanceLock.RUnlock() 353 354 assert.Equal(t, 0, len(bucket.disk.segments)) 355 }) 356 357 t.Run("keep importing crossing the dirty threshold", func(t *testing.T) { 358 rounds := 12 // at least 300ms 359 data := make([]byte, rounds*4) 360 _, err := rand.Read(data) 361 require.Nil(t, err) 362 363 for i := 0; i < rounds; i++ { 364 key := data[(i * 4) : (i+1)*4] 365 bucket.Put(key, []byte("value")) 366 time.Sleep(25 * time.Millisecond) 367 } 368 }) 369 370 t.Run("assert that flush has occurred in the meantime", func(t *testing.T) { 371 bucket.disk.maintenanceLock.RLock() 372 defer bucket.disk.maintenanceLock.RUnlock() 373 374 // at least 2 segments should be created already 375 assert.GreaterOrEqual(t, len(bucket.disk.segments), 2) 376 }) 377 378 t.Run("shutdown bucket", func(t *testing.T) { 379 ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) 380 defer cancel() 381 require.Nil(t, bucket.Shutdown(ctx)) 382 require.Nil(t, flushCycle.StopAndWait(ctx)) 383 }) 384 }) 385 }