github.com/koko1123/flow-go-1@v0.29.6/ledger/complete/wal/checkpointer_test.go (about) 1 package wal_test 2 3 import ( 4 "bytes" 5 "fmt" 6 "math" 7 "math/rand" 8 "os" 9 "path" 10 "testing" 11 "time" 12 13 "github.com/rs/zerolog" 14 "github.com/stretchr/testify/assert" 15 "github.com/stretchr/testify/require" 16 "go.uber.org/atomic" 17 18 "github.com/koko1123/flow-go-1/ledger" 19 "github.com/koko1123/flow-go-1/ledger/common/pathfinder" 20 "github.com/koko1123/flow-go-1/ledger/common/testutils" 21 "github.com/koko1123/flow-go-1/ledger/complete" 22 "github.com/koko1123/flow-go-1/ledger/complete/mtrie" 23 "github.com/koko1123/flow-go-1/ledger/complete/mtrie/trie" 24 realWAL "github.com/koko1123/flow-go-1/ledger/complete/wal" 25 "github.com/koko1123/flow-go-1/ledger/complete/wal/fixtures" 26 "github.com/koko1123/flow-go-1/module/metrics" 27 "github.com/koko1123/flow-go-1/utils/unittest" 28 ) 29 30 const ( 31 numInsPerStep = 2 32 keyNumberOfParts = 10 33 keyPartMinByteSize = 1 34 keyPartMaxByteSize = 100 35 valueMaxByteSize = 2 << 16 //16kB 36 size = 10 37 segmentSize = 32 * 1024 38 pathByteSize = 32 39 pathFinderVersion = uint8(complete.DefaultPathFinderVersion) 40 ) 41 42 var ( 43 logger = zerolog.Logger{} 44 metricsCollector = &metrics.NoopCollector{} 45 ) 46 47 func Test_WAL(t *testing.T) { 48 49 unittest.RunWithTempDir(t, func(dir string) { 50 51 const ( 52 checkpointDistance = math.MaxInt // A large number to prevent checkpoint creation. 53 checkpointsToKeep = 1 54 ) 55 56 diskWal, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metricsCollector, dir, size, pathfinder.PathByteSize, realWAL.SegmentSize) 57 require.NoError(t, err) 58 59 led, err := complete.NewLedger(diskWal, size*10, metricsCollector, logger, complete.DefaultPathFinderVersion) 60 require.NoError(t, err) 61 62 compactor, err := complete.NewCompactor(led, diskWal, unittest.Logger(), size, checkpointDistance, checkpointsToKeep, atomic.NewBool(false)) 63 require.NoError(t, err) 64 65 <-compactor.Ready() 66 67 var state = led.InitialState() 68 69 //saved data after updates 70 savedData := make(map[string]map[string]ledger.Value) 71 72 // WAL segments are 32kB, so here we generate 2 keys 16kB each, times `size` 73 // so we should get at least `size` segments 74 75 for i := 0; i < size; i++ { 76 77 keys := testutils.RandomUniqueKeys(numInsPerStep, keyNumberOfParts, keyPartMinByteSize, keyPartMaxByteSize) 78 values := testutils.RandomValues(numInsPerStep, valueMaxByteSize/2, valueMaxByteSize) 79 update, err := ledger.NewUpdate(state, keys, values) 80 require.NoError(t, err) 81 state, _, err = led.Set(update) 82 require.NoError(t, err) 83 84 data := make(map[string]ledger.Value, len(keys)) 85 for j, key := range keys { 86 data[string(ledger.EncodeKey(&key))] = values[j] 87 } 88 89 savedData[string(state[:])] = data 90 } 91 92 <-led.Done() 93 <-compactor.Done() 94 95 diskWal2, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metricsCollector, dir, size, pathfinder.PathByteSize, realWAL.SegmentSize) 96 require.NoError(t, err) 97 led2, err := complete.NewLedger(diskWal2, (size*10)+10, metricsCollector, logger, complete.DefaultPathFinderVersion) 98 require.NoError(t, err) 99 compactor2 := fixtures.NewNoopCompactor(led2) // noop compactor is used because no write is needed. 100 <-compactor2.Ready() 101 102 // random map iteration order is a benefit here 103 for state, data := range savedData { 104 105 keys := make([]ledger.Key, 0, len(data)) 106 for keyString := range data { 107 key, err := ledger.DecodeKey([]byte(keyString)) 108 require.NoError(t, err) 109 keys = append(keys, *key) 110 } 111 112 var ledgerState ledger.State 113 copy(ledgerState[:], state) 114 query, err := ledger.NewQuery(ledgerState, keys) 115 require.NoError(t, err) 116 values, err := led2.Get(query) 117 require.NoError(t, err) 118 119 for i, key := range keys { 120 assert.Equal(t, data[string(ledger.EncodeKey(&key))], values[i]) 121 } 122 } 123 124 <-led2.Done() 125 <-compactor2.Done() 126 }) 127 } 128 129 func Test_Checkpointing(t *testing.T) { 130 131 unittest.RunWithTempDir(t, func(dir string) { 132 133 f, err := mtrie.NewForest(size*10, metricsCollector, nil) 134 require.NoError(t, err) 135 136 var rootHash = f.GetEmptyRootHash() 137 138 //saved data after updates 139 savedData := make(map[ledger.RootHash]map[ledger.Path]*ledger.Payload) 140 141 t.Run("create WAL and initial trie", func(t *testing.T) { 142 143 wal, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metrics.NewNoopCollector(), dir, size*10, pathByteSize, segmentSize) 144 require.NoError(t, err) 145 146 // WAL segments are 32kB, so here we generate 2 keys 64kB each, times `size` 147 // so we should get at least `size` segments 148 149 // Generate the tree and create WAL 150 for i := 0; i < size; i++ { 151 152 keys := testutils.RandomUniqueKeys(numInsPerStep, keyNumberOfParts, 1600, 1600) 153 values := testutils.RandomValues(numInsPerStep, valueMaxByteSize/2, valueMaxByteSize) 154 update, err := ledger.NewUpdate(ledger.State(rootHash), keys, values) 155 require.NoError(t, err) 156 157 trieUpdate, err := pathfinder.UpdateToTrieUpdate(update, pathFinderVersion) 158 require.NoError(t, err) 159 160 _, _, err = wal.RecordUpdate(trieUpdate) 161 require.NoError(t, err) 162 163 rootHash, err := f.Update(trieUpdate) 164 require.NoError(t, err) 165 166 fmt.Printf("Updated with %x\n", rootHash) 167 168 data := make(map[ledger.Path]*ledger.Payload, len(trieUpdate.Paths)) 169 for j, path := range trieUpdate.Paths { 170 data[path] = trieUpdate.Payloads[j] 171 } 172 173 savedData[rootHash] = data 174 } 175 // some buffer time of the checkpointer to run 176 time.Sleep(1 * time.Second) 177 <-wal.Done() 178 179 require.FileExists(t, path.Join(dir, "00000010")) //make sure we have enough segments saved 180 }) 181 182 // create a new forest and replay WAL 183 f2, err := mtrie.NewForest(size*10, metricsCollector, nil) 184 require.NoError(t, err) 185 186 t.Run("replay WAL and create checkpoint", func(t *testing.T) { 187 188 require.NoFileExists(t, path.Join(dir, "checkpoint.00000010")) 189 190 wal2, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metrics.NewNoopCollector(), dir, size*10, pathByteSize, segmentSize) 191 require.NoError(t, err) 192 193 err = wal2.Replay( 194 func(tries []*trie.MTrie) error { 195 return fmt.Errorf("I should fail as there should be no checkpoints") 196 }, 197 func(update *ledger.TrieUpdate) error { 198 _, err := f2.Update(update) 199 return err 200 }, 201 func(rootHash ledger.RootHash) error { 202 return fmt.Errorf("I should fail as there should be no deletions") 203 }, 204 ) 205 require.NoError(t, err) 206 207 checkpointer, err := wal2.NewCheckpointer() 208 require.NoError(t, err) 209 210 require.NoFileExists(t, path.Join(dir, "checkpoint.00000010")) 211 212 err = checkpointer.Checkpoint(10) 213 require.NoError(t, err) 214 215 require.FileExists(t, path.Join(dir, "checkpoint.00000010")) //make sure we have checkpoint file 216 217 <-wal2.Done() 218 }) 219 220 f3, err := mtrie.NewForest(size*10, metricsCollector, nil) 221 require.NoError(t, err) 222 223 t.Run("read checkpoint", func(t *testing.T) { 224 wal3, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metrics.NewNoopCollector(), dir, size*10, pathByteSize, segmentSize) 225 require.NoError(t, err) 226 227 err = wal3.Replay( 228 func(tries []*trie.MTrie) error { 229 return f3.AddTries(tries) 230 }, 231 func(update *ledger.TrieUpdate) error { 232 return fmt.Errorf("I should fail as there should be no updates") 233 }, 234 func(rootHash ledger.RootHash) error { 235 return fmt.Errorf("I should fail as there should be no deletions") 236 }, 237 ) 238 require.NoError(t, err) 239 240 <-wal3.Done() 241 }) 242 243 t.Run("all forests contain the same data", func(t *testing.T) { 244 // random map iteration order is a benefit here 245 // make sure the tries has been rebuilt from WAL and another from from Checkpoint 246 // f1, f2 and f3 should be identical 247 for rootHash, data := range savedData { 248 249 paths := make([]ledger.Path, 0, len(data)) 250 for path := range data { 251 paths = append(paths, path) 252 } 253 254 values1, err := f.Read(&ledger.TrieRead{RootHash: rootHash, Paths: paths}) 255 require.NoError(t, err) 256 257 values2, err := f2.Read(&ledger.TrieRead{RootHash: rootHash, Paths: paths}) 258 require.NoError(t, err) 259 260 values3, err := f3.Read(&ledger.TrieRead{RootHash: rootHash, Paths: paths}) 261 require.NoError(t, err) 262 263 for i, path := range paths { 264 require.Equal(t, data[path].Value(), values1[i]) 265 require.Equal(t, data[path].Value(), values2[i]) 266 require.Equal(t, data[path].Value(), values3[i]) 267 } 268 } 269 }) 270 271 keys2 := testutils.RandomUniqueKeys(numInsPerStep, keyNumberOfParts, keyPartMinByteSize, keyPartMaxByteSize) 272 values2 := testutils.RandomValues(numInsPerStep, 1, valueMaxByteSize) 273 t.Run("create segment after checkpoint", func(t *testing.T) { 274 275 //require.NoFileExists(t, path.Join(dir, "00000011")) 276 277 unittest.RequireFileEmpty(t, path.Join(dir, "00000011")) 278 279 //generate one more segment 280 wal4, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metrics.NewNoopCollector(), dir, size*10, pathByteSize, segmentSize) 281 require.NoError(t, err) 282 283 update, err := ledger.NewUpdate(ledger.State(rootHash), keys2, values2) 284 require.NoError(t, err) 285 286 trieUpdate, err := pathfinder.UpdateToTrieUpdate(update, pathFinderVersion) 287 require.NoError(t, err) 288 289 _, _, err = wal4.RecordUpdate(trieUpdate) 290 require.NoError(t, err) 291 292 rootHash, err = f.Update(trieUpdate) 293 require.NoError(t, err) 294 295 <-wal4.Done() 296 297 require.FileExists(t, path.Join(dir, "00000011")) //make sure we have extra segment 298 }) 299 300 f5, err := mtrie.NewForest(size*10, metricsCollector, nil) 301 require.NoError(t, err) 302 303 t.Run("replay both checkpoint and updates after checkpoint", func(t *testing.T) { 304 wal5, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metrics.NewNoopCollector(), dir, size*10, pathByteSize, segmentSize) 305 require.NoError(t, err) 306 307 updatesLeft := 1 // there should be only one update 308 309 err = wal5.Replay( 310 func(tries []*trie.MTrie) error { 311 return f5.AddTries(tries) 312 }, 313 func(update *ledger.TrieUpdate) error { 314 if updatesLeft == 0 { 315 return fmt.Errorf("more updates called then expected") 316 } 317 _, err := f5.Update(update) 318 updatesLeft-- 319 return err 320 }, 321 func(rootHash ledger.RootHash) error { 322 return fmt.Errorf("I should fail as there should be no deletions") 323 }, 324 ) 325 require.NoError(t, err) 326 327 <-wal5.Done() 328 }) 329 330 t.Run("extra updates were applied correctly", func(t *testing.T) { 331 332 query, err := ledger.NewQuery(ledger.State(rootHash), keys2) 333 require.NoError(t, err) 334 trieRead, err := pathfinder.QueryToTrieRead(query, pathFinderVersion) 335 require.NoError(t, err) 336 337 values, err := f.Read(trieRead) 338 require.NoError(t, err) 339 340 values5, err := f5.Read(trieRead) 341 require.NoError(t, err) 342 343 for i := range keys2 { 344 require.Equal(t, values2[i], values[i]) 345 require.Equal(t, values2[i], values5[i]) 346 } 347 }) 348 349 t.Run("corrupted checkpoints are skipped", func(t *testing.T) { 350 351 f6, err := mtrie.NewForest(size*10, metricsCollector, nil) 352 require.NoError(t, err) 353 354 wal6, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metrics.NewNoopCollector(), dir, size*10, pathByteSize, segmentSize) 355 require.NoError(t, err) 356 357 // make sure no earlier checkpoints exist 358 require.NoFileExists(t, path.Join(dir, "checkpoint.0000008")) 359 require.NoFileExists(t, path.Join(dir, "checkpoint.0000006")) 360 require.NoFileExists(t, path.Join(dir, "checkpoint.0000004")) 361 362 require.FileExists(t, path.Join(dir, "checkpoint.00000010")) 363 364 // create missing checkpoints 365 checkpointer, err := wal6.NewCheckpointer() 366 require.NoError(t, err) 367 368 err = checkpointer.Checkpoint(4) 369 require.NoError(t, err) 370 require.FileExists(t, path.Join(dir, "checkpoint.00000004")) 371 372 err = checkpointer.Checkpoint(6) 373 require.NoError(t, err) 374 require.FileExists(t, path.Join(dir, "checkpoint.00000006")) 375 376 err = checkpointer.Checkpoint(8) 377 require.NoError(t, err) 378 require.FileExists(t, path.Join(dir, "checkpoint.00000008")) 379 380 // corrupt checkpoints 381 randomlyModifyFile(t, path.Join(dir, "checkpoint.00000006")) 382 randomlyModifyFile(t, path.Join(dir, "checkpoint.00000008")) 383 randomlyModifyFile(t, path.Join(dir, "checkpoint.00000010")) 384 385 // make sure 10 is latest checkpoint 386 latestCheckpoint, err := checkpointer.LatestCheckpoint() 387 require.NoError(t, err) 388 require.Equal(t, 10, latestCheckpoint) 389 390 // at this stage, number 4 should be the latest valid checkpoint 391 // check other fail to load 392 393 _, err = checkpointer.LoadCheckpoint(10) 394 require.Error(t, err) 395 _, err = checkpointer.LoadCheckpoint(8) 396 require.Error(t, err) 397 _, err = checkpointer.LoadCheckpoint(6) 398 require.Error(t, err) 399 _, err = checkpointer.LoadCheckpoint(4) 400 require.NoError(t, err) 401 402 err = wal6.ReplayOnForest(f6) 403 require.NoError(t, err) 404 405 <-wal6.Done() 406 407 // check if the latest data is still there 408 query, err := ledger.NewQuery(ledger.State(rootHash), keys2) 409 require.NoError(t, err) 410 trieRead, err := pathfinder.QueryToTrieRead(query, pathFinderVersion) 411 require.NoError(t, err) 412 413 values, err := f.Read(trieRead) 414 require.NoError(t, err) 415 416 values6, err := f6.Read(trieRead) 417 require.NoError(t, err) 418 419 for i := range keys2 { 420 require.Equal(t, values2[i], values[i]) 421 require.Equal(t, values2[i], values6[i]) 422 } 423 424 }) 425 426 }) 427 } 428 429 // func TestCheckpointFileError(t *testing.T) { 430 // 431 // unittest.RunWithTempDir(t, func(dir string) { 432 // 433 // wal, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metrics.NewNoopCollector(), dir, size*10, pathByteSize, segmentSize) 434 // require.NoError(t, err) 435 // 436 // // create WAL 437 // 438 // keys := testutils.RandomUniqueKeys(numInsPerStep, keyNumberOfParts, 1600, 1600) 439 // values := testutils.RandomValues(numInsPerStep, valueMaxByteSize/2, valueMaxByteSize) 440 // update, err := ledger.NewUpdate(ledger.State(trie.EmptyTrieRootHash()), keys, values) 441 // require.NoError(t, err) 442 // 443 // trieUpdate, err := pathfinder.UpdateToTrieUpdate(update, pathFinderVersion) 444 // require.NoError(t, err) 445 // 446 // _, _, err = wal.RecordUpdate(trieUpdate) 447 // require.NoError(t, err) 448 // 449 // // some buffer time of the checkpointer to run 450 // time.Sleep(1 * time.Second) 451 // <-wal.Done() 452 // 453 // require.FileExists(t, path.Join(dir, "00000001")) //make sure WAL segment is saved 454 // 455 // wal2, err := realWAL.NewDiskWAL(unittest.Logger(), nil, metrics.NewNoopCollector(), dir, size*10, pathByteSize, segmentSize) 456 // require.NoError(t, err) 457 // 458 // checkpointer, err := wal2.NewCheckpointer() 459 // require.NoError(t, err) 460 // 461 // t.Run("write error", func(t *testing.T) { 462 // errWrite := errors.New("unexpected write error") 463 // 464 // err = checkpointer.Checkpoint(1, func() (io.WriteCloser, error) { 465 // return newWriteCloserWithErrors(errWrite, nil), nil 466 // }) 467 // require.ErrorIs(t, err, errWrite) 468 // }) 469 // 470 // t.Run("close error", func(t *testing.T) { 471 // errClose := errors.New("unexpected close error") 472 // 473 // err = checkpointer.Checkpoint(1, func() (io.WriteCloser, error) { 474 // return newWriteCloserWithErrors(nil, errClose), nil 475 // }) 476 // require.ErrorIs(t, err, errClose) 477 // }) 478 // }) 479 // } 480 481 // randomlyModifyFile picks random byte and modifies it 482 // this should be enough to cause checkpoint loading to fail 483 // as it contains checksum 484 func randomlyModifyFile(t *testing.T, filename string) { 485 486 file, err := os.OpenFile(filename, os.O_RDWR, 0644) 487 require.NoError(t, err) 488 489 fileInfo, err := file.Stat() 490 require.NoError(t, err) 491 492 fileSize := fileInfo.Size() 493 494 buf := make([]byte, 1) 495 496 // get some random offset 497 offset := int64(rand.Int()) % (fileSize + int64(len(buf))) 498 499 _, err = file.ReadAt(buf, offset) 500 require.NoError(t, err) 501 502 // byte addition will simply wrap around 503 buf[0]++ 504 505 _, err = file.WriteAt(buf, offset) 506 require.NoError(t, err) 507 } 508 509 func Test_StoringLoadingCheckpoints(t *testing.T) { 510 511 unittest.RunWithTempDir(t, func(dir string) { 512 // some hash will be literally encoded in output file 513 // so we can find it and modify - to make sure we get a different checksum 514 // but not fail process by, for example, modifying saved data length causing EOF 515 516 emptyTrie := trie.NewEmptyMTrie() 517 518 p1 := testutils.PathByUint8(0) 519 v1 := testutils.LightPayload8('A', 'a') 520 521 p2 := testutils.PathByUint8(1) 522 v2 := testutils.LightPayload8('B', 'b') 523 524 paths := []ledger.Path{p1, p2} 525 payloads := []ledger.Payload{*v1, *v2} 526 527 updatedTrie, _, err := trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, true) 528 require.NoError(t, err) 529 530 someHash := updatedTrie.RootNode().LeftChild().Hash() // Hash of left child 531 532 fullpath := path.Join(dir, "temp-checkpoint") 533 534 err = realWAL.StoreCheckpointV5(dir, "temp-checkpoint", &logger, updatedTrie) 535 require.NoError(t, err) 536 537 t.Run("works without data modification", func(t *testing.T) { 538 logger := unittest.Logger() 539 tries, err := realWAL.LoadCheckpoint(fullpath, &logger) 540 require.NoError(t, err) 541 require.Equal(t, 1, len(tries)) 542 require.Equal(t, updatedTrie, tries[0]) 543 }) 544 545 t.Run("detects modified data", func(t *testing.T) { 546 b, err := os.ReadFile(fullpath) 547 require.NoError(t, err) 548 549 index := bytes.Index(b, someHash[:]) 550 require.NotEqual(t, -1, index) 551 b[index] = 23 552 553 err = os.WriteFile(fullpath, b, 0644) 554 require.NoError(t, err) 555 556 logger := unittest.Logger() 557 tries, err := realWAL.LoadCheckpoint(fullpath, &logger) 558 require.Error(t, err) 559 require.Nil(t, tries) 560 require.Contains(t, err.Error(), "checksum") 561 }) 562 }) 563 } 564 565 type writeCloserWithErrors struct { 566 writeError error 567 closeError error 568 } 569 570 func newWriteCloserWithErrors(writeError error, closeError error) *writeCloserWithErrors { 571 return &writeCloserWithErrors{ 572 writeError: writeError, 573 closeError: closeError, 574 } 575 } 576 577 func (wc *writeCloserWithErrors) Write(p []byte) (n int, err error) { 578 return 0, wc.writeError 579 } 580 581 func (wc *writeCloserWithErrors) Close() error { 582 return wc.closeError 583 }