github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/recover_from_wal_integration_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 //go:build integrationTest 13 // +build integrationTest 14 15 package lsmkv 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "io" 22 "os" 23 "os/exec" 24 "path/filepath" 25 "testing" 26 27 "github.com/stretchr/testify/assert" 28 "github.com/stretchr/testify/require" 29 "github.com/weaviate/weaviate/entities/cyclemanager" 30 ) 31 32 func TestReplaceStrategy_RecoverFromWAL(t *testing.T) { 33 dirNameOriginal := t.TempDir() 34 dirNameRecovered := t.TempDir() 35 36 t.Run("with some previous state", func(t *testing.T) { 37 b, err := NewBucket(testCtx(), dirNameOriginal, "", nullLogger(), nil, 38 cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 39 WithStrategy(StrategyReplace)) 40 require.Nil(t, err) 41 42 // so big it effectively never triggers as part of this test 43 b.SetMemtableThreshold(1e9) 44 45 t.Run("set one key that will be flushed orderly", func(t *testing.T) { 46 // the motivation behind flushing this initial segment is to check that 47 // deletion as part of the recovery also works correctly. If we would 48 // just delete something that was created as part of the same memtable, 49 // the tests would still pass, even with removing the logic that recovers 50 // tombstones. 51 // 52 // To make sure they fail in this case, this prior state was introduced. 53 // An entry with key "key-2" is introduced in a previous segment, so if 54 // the deletion fails as part of the recovery this key would still be 55 // present later on. With the deletion working correctly it will be gone. 56 // 57 // You can test this by commenting the "p.memtable.setTombstone()" line 58 // in p.doReplace(). This will fail the tests suite, but prior to this 59 // addition it would have passed. 60 key2 := []byte("key-2") 61 orig2 := []byte("delete me later - you should never find me again") 62 63 err = b.Put(key2, orig2) 64 require.Nil(t, err) 65 }) 66 67 t.Run("shutdown (orderly) bucket to create first segment", func(t *testing.T) { 68 b.Shutdown(context.Background()) 69 70 // then recreate bucket 71 var err error 72 b, err = NewBucket(testCtx(), dirNameOriginal, "", nullLogger(), nil, 73 cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 74 WithStrategy(StrategyReplace)) 75 require.Nil(t, err) 76 }) 77 78 t.Run("set original values", func(t *testing.T) { 79 key1 := []byte("key-1") 80 key2 := []byte("key-2") 81 key3 := []byte("key-3") 82 orig1 := []byte("original value for key1") 83 orig2 := []byte("original value for key2") 84 orig3 := []byte("original value for key3") 85 86 err = b.Put(key1, orig1) 87 require.Nil(t, err) 88 err = b.Put(key2, orig2) 89 require.Nil(t, err) 90 err = b.Put(key3, orig3) 91 require.Nil(t, err) 92 }) 93 94 t.Run("delete one, update one", func(t *testing.T) { 95 key2 := []byte("key-2") 96 key3 := []byte("key-3") 97 updated3 := []byte("updated value for key 3") 98 99 err = b.Delete(key2) 100 require.Nil(t, err) 101 102 err = b.Put(key3, updated3) 103 require.Nil(t, err) 104 }) 105 106 t.Run("verify control", func(t *testing.T) { 107 key1 := []byte("key-1") 108 key2 := []byte("key-2") 109 key3 := []byte("key-3") 110 orig1 := []byte("original value for key1") 111 updated3 := []byte("updated value for key 3") 112 res, err := b.Get(key1) 113 require.Nil(t, err) 114 assert.Equal(t, res, orig1) 115 res, err = b.Get(key2) 116 require.Nil(t, err) 117 assert.Nil(t, res) 118 res, err = b.Get(key3) 119 require.Nil(t, err) 120 assert.Equal(t, res, updated3) 121 }) 122 123 t.Run("make sure the WAL is flushed", func(t *testing.T) { 124 require.Nil(t, b.WriteWAL()) 125 }) 126 127 t.Run("copy state into recovery folder and destroy original", func(t *testing.T) { 128 t.Run("copy over wals", func(t *testing.T) { 129 cmd := exec.Command("/bin/bash", "-c", fmt.Sprintf("cp -r %s/*.wal %s", 130 dirNameOriginal, dirNameRecovered)) 131 var out bytes.Buffer 132 cmd.Stderr = &out 133 err := cmd.Run() 134 if err != nil { 135 fmt.Println(out.String()) 136 t.Fatal(err) 137 } 138 }) 139 140 t.Run("copy over segments", func(t *testing.T) { 141 cmd := exec.Command("/bin/bash", "-c", fmt.Sprintf("cp -r %s/*.db %s", 142 dirNameOriginal, dirNameRecovered)) 143 var out bytes.Buffer 144 cmd.Stderr = &out 145 err := cmd.Run() 146 if err != nil { 147 fmt.Println(out.String()) 148 t.Fatal(err) 149 } 150 }) 151 b = nil 152 require.Nil(t, os.RemoveAll(dirNameOriginal)) 153 }) 154 155 var bRec *Bucket 156 157 t.Run("create new bucket from existing state", func(t *testing.T) { 158 b, err := NewBucket(testCtx(), dirNameRecovered, "", nullLogger(), nil, 159 cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 160 WithStrategy(StrategyReplace)) 161 require.Nil(t, err) 162 163 // so big it effectively never triggers as part of this test 164 b.SetMemtableThreshold(1e9) 165 166 bRec = b 167 }) 168 169 t.Run("verify all data is present", func(t *testing.T) { 170 key1 := []byte("key-1") 171 key2 := []byte("key-2") 172 key3 := []byte("key-3") 173 orig1 := []byte("original value for key1") 174 updated3 := []byte("updated value for key 3") 175 res, err := bRec.Get(key1) 176 require.Nil(t, err) 177 assert.Equal(t, res, orig1) 178 res, err = bRec.Get(key2) 179 require.Nil(t, err) 180 assert.Nil(t, res) 181 res, err = bRec.Get(key3) 182 require.Nil(t, err) 183 assert.Equal(t, res, updated3) 184 }) 185 }) 186 } 187 188 func TestReplaceStrategy_RecoverFromWALWithCorruptLastElement(t *testing.T) { 189 dirNameOriginal := t.TempDir() 190 dirNameRecovered := t.TempDir() 191 192 t.Run("without previous state", func(t *testing.T) { 193 b, err := NewBucket(testCtx(), dirNameOriginal, "", nullLogger(), nil, 194 cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 195 WithStrategy(StrategyReplace)) 196 require.Nil(t, err) 197 198 // so big it effectively never triggers as part of this test 199 b.SetMemtableThreshold(1e9) 200 201 t.Run("set original values", func(t *testing.T) { 202 key1 := []byte("key-1") 203 key2 := []byte("key-2") 204 key3 := []byte("key-3") 205 orig1 := []byte("original value for key1") 206 orig2 := []byte("original value for key2") 207 orig3 := []byte("original value for key3") 208 209 err = b.Put(key1, orig1) 210 require.Nil(t, err) 211 err = b.Put(key2, orig2) 212 require.Nil(t, err) 213 err = b.Put(key3, orig3) 214 require.Nil(t, err) 215 }) 216 217 t.Run("delete one, update one", func(t *testing.T) { 218 key2 := []byte("key-2") 219 key3 := []byte("key-3") 220 updated3 := []byte("updated value for key 3") 221 222 err = b.Delete(key2) 223 require.Nil(t, err) 224 225 err = b.Put(key3, updated3) 226 require.Nil(t, err) 227 }) 228 229 t.Run("verify control", func(t *testing.T) { 230 key1 := []byte("key-1") 231 key2 := []byte("key-2") 232 key3 := []byte("key-3") 233 orig1 := []byte("original value for key1") 234 updated3 := []byte("updated value for key 3") 235 res, err := b.Get(key1) 236 require.Nil(t, err) 237 assert.Equal(t, res, orig1) 238 res, err = b.Get(key2) 239 require.Nil(t, err) 240 assert.Nil(t, res) 241 res, err = b.Get(key3) 242 require.Nil(t, err) 243 assert.Equal(t, res, updated3) 244 }) 245 246 t.Run("make sure the WAL is flushed", func(t *testing.T) { 247 require.Nil(t, b.WriteWAL()) 248 }) 249 250 t.Run("copy state into recovery folder and destroy original", func(t *testing.T) { 251 cmd := exec.Command("/bin/bash", "-c", fmt.Sprintf("cp -r %s/*.wal %s", 252 dirNameOriginal, dirNameRecovered)) 253 var out bytes.Buffer 254 cmd.Stderr = &out 255 err := cmd.Run() 256 if err != nil { 257 fmt.Println(out.String()) 258 t.Fatal(err) 259 } 260 b = nil 261 require.Nil(t, os.RemoveAll(dirNameOriginal)) 262 }) 263 264 t.Run("corrupt WAL by removing some bytes at the very end", func(t *testing.T) { 265 entries, err := os.ReadDir(dirNameRecovered) 266 require.Nil(t, err) 267 require.Len(t, entries, 1, "there should be exactly one .wal file") 268 269 oldFileName := filepath.Join(dirNameRecovered, entries[0].Name()) 270 tmpFileName := oldFileName + ".tmp" 271 272 err = os.Rename(oldFileName, tmpFileName) 273 require.Nil(t, err) 274 275 orig, err := os.Open(tmpFileName) 276 require.Nil(t, err) 277 278 correctLog, err := io.ReadAll(orig) 279 require.Nil(t, err) 280 err = orig.Close() 281 require.Nil(t, err) 282 283 corruptLog := correctLog[:len(correctLog)-6] 284 285 err = os.Remove(tmpFileName) 286 require.Nil(t, err) 287 288 corrupt, err := os.Create(oldFileName) 289 require.Nil(t, err) 290 291 _, err = corrupt.Write(corruptLog) 292 require.Nil(t, err) 293 294 err = corrupt.Close() 295 require.Nil(t, err) 296 }) 297 298 var bRec *Bucket 299 300 t.Run("create new bucket from existing state", func(t *testing.T) { 301 b, err := NewBucket(testCtx(), dirNameRecovered, "", nullLogger(), nil, 302 cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 303 WithStrategy(StrategyReplace)) 304 require.Nil(t, err) 305 306 // so big it effectively never triggers as part of this test 307 b.SetMemtableThreshold(1e9) 308 309 bRec = b 310 }) 311 312 t.Run("verify all data prior to the corruption is present", func(t *testing.T) { 313 key1 := []byte("key-1") 314 key2 := []byte("key-2") 315 key3 := []byte("key-3") 316 orig1 := []byte("original value for key1") 317 notUpdated3 := []byte("original value for key3") 318 319 // the last operation we performed (that now got corrupted) was an update 320 // on key3. So now that we're expecting all state prior to the corruption 321 // to be present, we would expect the original value for key3 322 323 res, err := bRec.Get(key1) 324 require.Nil(t, err) 325 assert.Equal(t, res, orig1) 326 res, err = bRec.Get(key2) 327 require.Nil(t, err) 328 assert.Nil(t, res) 329 res, err = bRec.Get(key3) 330 require.Nil(t, err) 331 assert.Equal(t, res, notUpdated3) 332 }) 333 }) 334 } 335 336 func TestSetStrategy_RecoverFromWAL(t *testing.T) { 337 dirNameOriginal := t.TempDir() 338 dirNameRecovered := t.TempDir() 339 340 t.Run("without prior state", func(t *testing.T) { 341 b, err := NewBucket(testCtx(), dirNameOriginal, "", nullLogger(), nil, 342 cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 343 WithStrategy(StrategySetCollection)) 344 require.Nil(t, err) 345 346 // so big it effectively never triggers as part of this test 347 b.SetMemtableThreshold(1e9) 348 349 key1 := []byte("test1-key-1") 350 key2 := []byte("test1-key-2") 351 key3 := []byte("test1-key-3") 352 353 t.Run("set original values and verify", func(t *testing.T) { 354 orig1 := [][]byte{[]byte("value 1.1"), []byte("value 1.2")} 355 orig2 := [][]byte{[]byte("value 2.1"), []byte("value 2.2")} 356 orig3 := [][]byte{[]byte("value 3.1"), []byte("value 3.2")} 357 358 err = b.SetAdd(key1, orig1) 359 require.Nil(t, err) 360 err = b.SetAdd(key2, orig2) 361 require.Nil(t, err) 362 err = b.SetAdd(key3, orig3) 363 require.Nil(t, err) 364 365 res, err := b.SetList(key1) 366 require.Nil(t, err) 367 assert.Equal(t, orig1, res) 368 res, err = b.SetList(key2) 369 require.Nil(t, err) 370 assert.Equal(t, orig2, res) 371 res, err = b.SetList(key3) 372 require.Nil(t, err) 373 assert.Equal(t, orig3, res) 374 }) 375 376 t.Run("delete individual keys", func(t *testing.T) { 377 delete2 := []byte("value 2.1") 378 delete3 := []byte("value 3.2") 379 380 err = b.SetDeleteSingle(key2, delete2) 381 require.Nil(t, err) 382 err = b.SetDeleteSingle(key3, delete3) 383 require.Nil(t, err) 384 }) 385 386 t.Run("re-add keys which were previously deleted and new ones", func(t *testing.T) { 387 readd2 := [][]byte{[]byte("value 2.1"), []byte("value 2.3")} 388 readd3 := [][]byte{[]byte("value 3.2"), []byte("value 3.3")} 389 390 err = b.SetAdd(key2, readd2) 391 require.Nil(t, err) 392 err = b.SetAdd(key3, readd3) 393 require.Nil(t, err) 394 }) 395 396 t.Run("validate the results prior to recovery", func(t *testing.T) { 397 expected1 := [][]byte{[]byte("value 1.1"), []byte("value 1.2")} // unchanged 398 expected2 := [][]byte{ 399 []byte("value 2.2"), // from original import 400 []byte("value 2.1"), // added again after initial deletion 401 []byte("value 2.3"), // newly added 402 } 403 expected3 := [][]byte{ 404 []byte("value 3.1"), // form original import 405 []byte("value 3.2"), // added again after initial deletion 406 []byte("value 3.3"), // newly added 407 } // value2 deleted 408 409 res, err := b.SetList(key1) 410 require.Nil(t, err) 411 assert.Equal(t, expected1, res) 412 res, err = b.SetList(key2) 413 require.Nil(t, err) 414 assert.Equal(t, expected2, res) 415 res, err = b.SetList(key3) 416 require.Nil(t, err) 417 assert.Equal(t, expected3, res) 418 }) 419 420 t.Run("make sure the WAL is flushed", func(t *testing.T) { 421 require.Nil(t, b.WriteWAL()) 422 }) 423 424 t.Run("copy state into recovery folder and destroy original", func(t *testing.T) { 425 cmd := exec.Command("/bin/bash", "-c", fmt.Sprintf("cp -r %s/*.wal %s", 426 dirNameOriginal, dirNameRecovered)) 427 var out bytes.Buffer 428 cmd.Stderr = &out 429 err := cmd.Run() 430 if err != nil { 431 fmt.Println(out.String()) 432 t.Fatal(err) 433 } 434 b = nil 435 require.Nil(t, os.RemoveAll(dirNameOriginal)) 436 }) 437 438 var bRec *Bucket 439 440 t.Run("create new bucket from existing state", func(t *testing.T) { 441 b, err := NewBucket(testCtx(), dirNameRecovered, "", nullLogger(), nil, 442 cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 443 WithStrategy(StrategySetCollection)) 444 require.Nil(t, err) 445 446 // so big it effectively never triggers as part of this test 447 b.SetMemtableThreshold(1e9) 448 449 bRec = b 450 }) 451 452 t.Run("validate the results after recovery", func(t *testing.T) { 453 expected1 := [][]byte{[]byte("value 1.1"), []byte("value 1.2")} // unchanged 454 expected2 := [][]byte{ 455 []byte("value 2.2"), // from original import 456 []byte("value 2.1"), // added again after initial deletion 457 []byte("value 2.3"), // newly added 458 } 459 expected3 := [][]byte{ 460 []byte("value 3.1"), // form original import 461 []byte("value 3.2"), // added again after initial deletion 462 []byte("value 3.3"), // newly added 463 } // value2 deleted 464 465 res, err := bRec.SetList(key1) 466 require.Nil(t, err) 467 assert.Equal(t, expected1, res) 468 res, err = bRec.SetList(key2) 469 require.Nil(t, err) 470 assert.Equal(t, expected2, res) 471 res, err = bRec.SetList(key3) 472 require.Nil(t, err) 473 assert.Equal(t, expected3, res) 474 }) 475 }) 476 } 477 478 func TestMapStrategy_RecoverFromWAL(t *testing.T) { 479 dirNameOriginal := t.TempDir() 480 dirNameRecovered := t.TempDir() 481 482 t.Run("without prior state", func(t *testing.T) { 483 b, err := NewBucket(testCtx(), dirNameOriginal, "", nullLogger(), nil, 484 cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 485 WithStrategy(StrategyMapCollection)) 486 require.Nil(t, err) 487 488 // so big it effectively never triggers as part of this test 489 b.SetMemtableThreshold(1e9) 490 491 rowKey1 := []byte("test1-key-1") 492 rowKey2 := []byte("test1-key-2") 493 494 t.Run("set original values and verify", func(t *testing.T) { 495 row1Map := []MapPair{ 496 { 497 Key: []byte("row1-key1"), 498 Value: []byte("row1-key1-value1"), 499 }, { 500 Key: []byte("row1-key2"), 501 Value: []byte("row1-key2-value1"), 502 }, 503 } 504 505 row2Map := []MapPair{ 506 { 507 Key: []byte("row2-key1"), 508 Value: []byte("row2-key1-value1"), 509 }, { 510 Key: []byte("row2-key2"), 511 Value: []byte("row2-key2-value1"), 512 }, 513 } 514 515 for _, pair := range row1Map { 516 err = b.MapSet(rowKey1, pair) 517 require.Nil(t, err) 518 } 519 520 for _, pair := range row2Map { 521 err = b.MapSet(rowKey2, pair) 522 require.Nil(t, err) 523 } 524 525 res, err := b.MapList(rowKey1) 526 require.Nil(t, err) 527 assert.Equal(t, row1Map, res) 528 res, err = b.MapList(rowKey2) 529 require.Nil(t, err) 530 assert.Equal(t, res, row2Map) 531 }) 532 533 t.Run("replace an existing map key", func(t *testing.T) { 534 err = b.MapSet(rowKey1, MapPair{ 535 Key: []byte("row1-key1"), // existing key 536 Value: []byte("row1-key1-value2"), // updated value 537 }) 538 require.Nil(t, err) 539 540 row1Updated := []MapPair{ 541 { 542 Key: []byte("row1-key1"), 543 Value: []byte("row1-key1-value2"), // <--- updated, rest unchanged 544 }, { 545 Key: []byte("row1-key2"), 546 Value: []byte("row1-key2-value1"), 547 }, 548 } 549 550 row2Unchanged := []MapPair{ 551 { 552 Key: []byte("row2-key1"), 553 Value: []byte("row2-key1-value1"), 554 }, { 555 Key: []byte("row2-key2"), 556 Value: []byte("row2-key2-value1"), 557 }, 558 } 559 560 res, err := b.MapList(rowKey1) 561 require.Nil(t, err) 562 assert.Equal(t, row1Updated, res) 563 res, err = b.MapList(rowKey2) 564 require.Nil(t, err) 565 assert.Equal(t, res, row2Unchanged) 566 }) 567 568 t.Run("validate the results prior to recovery", func(t *testing.T) { 569 rowKey1 := []byte("test1-key-1") 570 rowKey2 := []byte("test1-key-2") 571 572 expectedRow1 := []MapPair{ 573 { 574 Key: []byte("row1-key1"), 575 Value: []byte("row1-key1-value2"), 576 }, { 577 Key: []byte("row1-key2"), 578 Value: []byte("row1-key2-value1"), 579 }, 580 } 581 582 expectedRow2 := []MapPair{ 583 { 584 Key: []byte("row2-key1"), 585 Value: []byte("row2-key1-value1"), 586 }, { 587 Key: []byte("row2-key2"), 588 Value: []byte("row2-key2-value1"), 589 }, 590 } 591 592 res, err := b.MapList(rowKey1) 593 require.Nil(t, err) 594 assert.Equal(t, expectedRow1, res) 595 res, err = b.MapList(rowKey2) 596 require.Nil(t, err) 597 assert.Equal(t, expectedRow2, res) 598 }) 599 600 t.Run("make sure the WAL is flushed", func(t *testing.T) { 601 require.Nil(t, b.WriteWAL()) 602 }) 603 604 t.Run("copy state into recovery folder and destroy original", func(t *testing.T) { 605 cmd := exec.Command("/bin/bash", "-c", fmt.Sprintf("cp -r %s/*.wal %s", 606 dirNameOriginal, dirNameRecovered)) 607 var out bytes.Buffer 608 cmd.Stderr = &out 609 err := cmd.Run() 610 if err != nil { 611 fmt.Println(out.String()) 612 t.Fatal(err) 613 } 614 b = nil 615 require.Nil(t, os.RemoveAll(dirNameOriginal)) 616 }) 617 618 var bRec *Bucket 619 620 t.Run("create new bucket from existing state", func(t *testing.T) { 621 b, err := NewBucket(testCtx(), dirNameRecovered, "", nullLogger(), nil, 622 cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 623 WithStrategy(StrategyMapCollection)) 624 require.Nil(t, err) 625 626 // so big it effectively never triggers as part of this test 627 b.SetMemtableThreshold(1e9) 628 629 bRec = b 630 }) 631 632 t.Run("validate the results after recovery", func(t *testing.T) { 633 rowKey1 := []byte("test1-key-1") 634 rowKey2 := []byte("test1-key-2") 635 636 expectedRow1 := []MapPair{ 637 { 638 Key: []byte("row1-key1"), 639 Value: []byte("row1-key1-value2"), 640 }, { 641 Key: []byte("row1-key2"), 642 Value: []byte("row1-key2-value1"), 643 }, 644 } 645 646 expectedRow2 := []MapPair{ 647 { 648 Key: []byte("row2-key1"), 649 Value: []byte("row2-key1-value1"), 650 }, { 651 Key: []byte("row2-key2"), 652 Value: []byte("row2-key2-value1"), 653 }, 654 } 655 656 res, err := bRec.MapList(rowKey1) 657 require.Nil(t, err) 658 assert.Equal(t, expectedRow1, res) 659 res, err = bRec.MapList(rowKey2) 660 require.Nil(t, err) 661 assert.Equal(t, expectedRow2, res) 662 }) 663 }) 664 }