github.com/koko1123/flow-go-1@v0.29.6/ledger/complete/ledger.go (about) 1 package complete 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "io" 7 "os" 8 "time" 9 10 "github.com/rs/zerolog" 11 12 "github.com/koko1123/flow-go-1/ledger" 13 "github.com/koko1123/flow-go-1/ledger/common/hash" 14 "github.com/koko1123/flow-go-1/ledger/common/pathfinder" 15 "github.com/koko1123/flow-go-1/ledger/complete/mtrie" 16 "github.com/koko1123/flow-go-1/ledger/complete/mtrie/trie" 17 realWAL "github.com/koko1123/flow-go-1/ledger/complete/wal" 18 "github.com/koko1123/flow-go-1/module" 19 ) 20 21 const DefaultCacheSize = 1000 22 const DefaultPathFinderVersion = 1 23 const defaultTrieUpdateChanSize = 500 24 25 // Ledger (complete) is a fast memory-efficient fork-aware thread-safe trie-based key/value storage. 26 // Ledger holds an array of registers (key-value pairs) and keeps tracks of changes over a limited time. 27 // Each register is referenced by an ID (key) and holds a value (byte slice). 28 // Ledger provides atomic batched updates and read (with or without proofs) operation given a list of keys. 29 // Every update to the Ledger creates a new state which captures the state of the storage. 30 // Under the hood, it uses binary Merkle tries to generate inclusion and non-inclusion proofs. 31 // Ledger is fork-aware which means any update can be applied at any previous state which forms a tree of tries (forest). 32 // The forest is in memory but all changes (e.g. register updates) are captured inside write-ahead-logs for crash recovery reasons. 33 // In order to limit the memory usage and maintain the performance storage only keeps a limited number of 34 // tries and purge the old ones (FIFO-based); in other words, Ledger is not designed to be used 35 // for archival usage but make it possible for other software components to reconstruct very old tries using write-ahead logs. 36 type Ledger struct { 37 forest *mtrie.Forest 38 wal realWAL.LedgerWAL 39 metrics module.LedgerMetrics 40 logger zerolog.Logger 41 trieUpdateCh chan *WALTrieUpdate 42 pathFinderVersion uint8 43 } 44 45 // NewLedger creates a new in-memory trie-backed ledger storage with persistence. 46 func NewLedger( 47 wal realWAL.LedgerWAL, 48 capacity int, 49 metrics module.LedgerMetrics, 50 log zerolog.Logger, 51 pathFinderVer uint8) (*Ledger, error) { 52 53 logger := log.With().Str("ledger_mod", "complete").Logger() 54 55 forest, err := mtrie.NewForest(capacity, metrics, nil) 56 if err != nil { 57 return nil, fmt.Errorf("cannot create forest: %w", err) 58 } 59 60 storage := &Ledger{ 61 forest: forest, 62 wal: wal, 63 metrics: metrics, 64 logger: logger, 65 pathFinderVersion: pathFinderVer, 66 trieUpdateCh: make(chan *WALTrieUpdate, defaultTrieUpdateChanSize), 67 } 68 69 // pause records to prevent double logging trie removals 70 wal.PauseRecord() 71 defer wal.UnpauseRecord() 72 73 err = wal.ReplayOnForest(forest) 74 if err != nil { 75 return nil, fmt.Errorf("cannot restore LedgerWAL: %w", err) 76 } 77 78 wal.UnpauseRecord() 79 80 // TODO update to proper value once https://github.com/koko1123/flow-go-1/pull/3720 is merged 81 metrics.ForestApproxMemorySize(0) 82 83 return storage, nil 84 } 85 86 // TrieUpdateChan returns a channel which is used to receive trie updates that needs to be logged in WALs. 87 // This channel is closed when ledger component shutdowns down. 88 func (l *Ledger) TrieUpdateChan() <-chan *WALTrieUpdate { 89 return l.trieUpdateCh 90 } 91 92 // Ready implements interface module.ReadyDoneAware 93 // it starts the EventLoop's internal processing loop. 94 func (l *Ledger) Ready() <-chan struct{} { 95 ready := make(chan struct{}) 96 go func() { 97 defer close(ready) 98 // Start WAL component. 99 <-l.wal.Ready() 100 }() 101 return ready 102 } 103 104 // Done implements interface module.ReadyDoneAware 105 func (l *Ledger) Done() <-chan struct{} { 106 done := make(chan struct{}) 107 go func() { 108 defer close(done) 109 110 // Ledger is responsible for closing trieUpdateCh channel, 111 // so Compactor can drain and process remaining updates. 112 close(l.trieUpdateCh) 113 }() 114 return done 115 } 116 117 // InitialState returns the state of an empty ledger 118 func (l *Ledger) InitialState() ledger.State { 119 return ledger.State(l.forest.GetEmptyRootHash()) 120 } 121 122 // ValueSizes read the values of the given keys at the given state. 123 // It returns value sizes in the same order as given registerIDs and errors (if any) 124 func (l *Ledger) ValueSizes(query *ledger.Query) (valueSizes []int, err error) { 125 start := time.Now() 126 paths, err := pathfinder.KeysToPaths(query.Keys(), l.pathFinderVersion) 127 if err != nil { 128 return nil, err 129 } 130 trieRead := &ledger.TrieRead{RootHash: ledger.RootHash(query.State()), Paths: paths} 131 valueSizes, err = l.forest.ValueSizes(trieRead) 132 if err != nil { 133 return nil, err 134 } 135 136 l.metrics.ReadValuesNumber(uint64(len(paths))) 137 readDuration := time.Since(start) 138 l.metrics.ReadDuration(readDuration) 139 140 if len(paths) > 0 { 141 durationPerValue := time.Duration(readDuration.Nanoseconds()/int64(len(paths))) * time.Nanosecond 142 l.metrics.ReadDurationPerItem(durationPerValue) 143 } 144 145 return valueSizes, err 146 } 147 148 // GetSingleValue reads value of a single given key at the given state. 149 func (l *Ledger) GetSingleValue(query *ledger.QuerySingleValue) (value ledger.Value, err error) { 150 start := time.Now() 151 path, err := pathfinder.KeyToPath(query.Key(), l.pathFinderVersion) 152 if err != nil { 153 return nil, err 154 } 155 trieRead := &ledger.TrieReadSingleValue{RootHash: ledger.RootHash(query.State()), Path: path} 156 value, err = l.forest.ReadSingleValue(trieRead) 157 if err != nil { 158 return nil, err 159 } 160 161 l.metrics.ReadValuesNumber(1) 162 readDuration := time.Since(start) 163 l.metrics.ReadDuration(readDuration) 164 165 durationPerValue := time.Duration(readDuration.Nanoseconds()) * time.Nanosecond 166 l.metrics.ReadDurationPerItem(durationPerValue) 167 168 return value, nil 169 } 170 171 // Get read the values of the given keys at the given state 172 // it returns the values in the same order as given registerIDs and errors (if any) 173 func (l *Ledger) Get(query *ledger.Query) (values []ledger.Value, err error) { 174 start := time.Now() 175 paths, err := pathfinder.KeysToPaths(query.Keys(), l.pathFinderVersion) 176 if err != nil { 177 return nil, err 178 } 179 trieRead := &ledger.TrieRead{RootHash: ledger.RootHash(query.State()), Paths: paths} 180 values, err = l.forest.Read(trieRead) 181 if err != nil { 182 return nil, err 183 } 184 185 l.metrics.ReadValuesNumber(uint64(len(paths))) 186 readDuration := time.Since(start) 187 l.metrics.ReadDuration(readDuration) 188 189 if len(paths) > 0 { 190 durationPerValue := time.Duration(readDuration.Nanoseconds()/int64(len(paths))) * time.Nanosecond 191 l.metrics.ReadDurationPerItem(durationPerValue) 192 } 193 194 return values, err 195 } 196 197 // Set updates the ledger given an update. 198 // It returns the state after update and errors (if any) 199 func (l *Ledger) Set(update *ledger.Update) (newState ledger.State, trieUpdate *ledger.TrieUpdate, err error) { 200 start := time.Now() 201 202 // TODO: add test case 203 if update.Size() == 0 { 204 // return current state root unchanged 205 return update.State(), nil, nil 206 } 207 208 trieUpdate, err = pathfinder.UpdateToTrieUpdate(update, l.pathFinderVersion) 209 if err != nil { 210 return ledger.State(hash.DummyHash), nil, err 211 } 212 213 l.metrics.UpdateCount() 214 215 newState, err = l.set(trieUpdate) 216 if err != nil { 217 return ledger.State(hash.DummyHash), nil, err 218 } 219 220 // TODO update to proper value once https://github.com/koko1123/flow-go-1/pull/3720 is merged 221 l.metrics.ForestApproxMemorySize(0) 222 223 elapsed := time.Since(start) 224 l.metrics.UpdateDuration(elapsed) 225 226 if len(trieUpdate.Paths) > 0 { 227 durationPerValue := time.Duration(elapsed.Nanoseconds() / int64(len(trieUpdate.Paths))) 228 l.metrics.UpdateDurationPerItem(durationPerValue) 229 } 230 231 state := update.State() 232 l.logger.Info().Hex("from", state[:]). 233 Hex("to", newState[:]). 234 Int("update_size", update.Size()). 235 Msg("ledger updated") 236 return newState, trieUpdate, nil 237 } 238 239 func (l *Ledger) set(trieUpdate *ledger.TrieUpdate) (newState ledger.State, err error) { 240 241 // resultCh is a buffered channel to receive WAL update result. 242 resultCh := make(chan error, 1) 243 244 // trieCh is a buffered channel to send updated trie. 245 // trieCh can be closed without sending updated trie to indicate failure to update trie. 246 trieCh := make(chan *trie.MTrie, 1) 247 defer close(trieCh) 248 249 // There are two goroutines: 250 // 1. writing the trie update to WAL (in Compactor goroutine) 251 // 2. creating a new trie from the trie update (in this goroutine) 252 // Since writing to WAL is running concurrently, we use resultCh 253 // to receive WAL update result from Compactor. 254 // Compactor also needs new trie created here because Compactor 255 // caches new trie to minimize memory foot-print while checkpointing. 256 // `trieCh` is used to send created trie to Compactor. 257 l.trieUpdateCh <- &WALTrieUpdate{Update: trieUpdate, ResultCh: resultCh, TrieCh: trieCh} 258 259 newTrie, err := l.forest.NewTrie(trieUpdate) 260 walError := <-resultCh 261 262 if err != nil { 263 return ledger.State(hash.DummyHash), fmt.Errorf("cannot update state: %w", err) 264 } 265 if walError != nil { 266 return ledger.State(hash.DummyHash), fmt.Errorf("error while writing LedgerWAL: %w", walError) 267 } 268 269 err = l.forest.AddTrie(newTrie) 270 if err != nil { 271 return ledger.State(hash.DummyHash), fmt.Errorf("failed to add new trie to forest: %w", err) 272 } 273 274 trieCh <- newTrie 275 276 return ledger.State(newTrie.RootHash()), nil 277 } 278 279 // Prove provides proofs for a ledger query and errors (if any). 280 // 281 // Proves are generally _not_ provided in the register order of the query. 282 // In the current implementation, proofs are sorted in a deterministic order specified by the 283 // forest and mtrie implementation. 284 func (l *Ledger) Prove(query *ledger.Query) (proof ledger.Proof, err error) { 285 286 paths, err := pathfinder.KeysToPaths(query.Keys(), l.pathFinderVersion) 287 if err != nil { 288 return nil, err 289 } 290 291 trieRead := &ledger.TrieRead{RootHash: ledger.RootHash(query.State()), Paths: paths} 292 batchProof, err := l.forest.Proofs(trieRead) 293 if err != nil { 294 return nil, fmt.Errorf("could not get proofs: %w", err) 295 } 296 297 proofToGo := ledger.EncodeTrieBatchProof(batchProof) 298 299 if len(paths) > 0 { 300 l.metrics.ProofSize(uint32(len(proofToGo) / len(paths))) 301 } 302 303 return proofToGo, err 304 } 305 306 // MemSize return the amount of memory used by ledger 307 // TODO implement an approximate MemSize method 308 func (l *Ledger) MemSize() (int64, error) { 309 return 0, nil 310 } 311 312 // ForestSize returns the number of tries stored in the forest 313 func (l *Ledger) ForestSize() int { 314 return l.forest.Size() 315 } 316 317 // Tries returns the tries stored in the forest 318 func (l *Ledger) Tries() ([]*trie.MTrie, error) { 319 return l.forest.GetTries() 320 } 321 322 // Checkpointer returns a checkpointer instance 323 func (l *Ledger) Checkpointer() (*realWAL.Checkpointer, error) { 324 checkpointer, err := l.wal.NewCheckpointer() 325 if err != nil { 326 return nil, fmt.Errorf("cannot create checkpointer for compactor: %w", err) 327 } 328 return checkpointer, nil 329 } 330 331 // ExportCheckpointAt exports a checkpoint at specific state commitment after applying migrations and returns the new state (after migration) and any errors 332 func (l *Ledger) ExportCheckpointAt( 333 state ledger.State, 334 migrations []ledger.Migration, 335 preCheckpointReporters []ledger.Reporter, 336 postCheckpointReporters []ledger.Reporter, 337 targetPathFinderVersion uint8, 338 outputDir, outputFile string, 339 ) (ledger.State, error) { 340 341 l.logger.Info().Msgf( 342 "Ledger is loaded, checkpoint export has started for state %s, and %d migrations have been planed", 343 state.String(), 344 len(migrations), 345 ) 346 347 // get trie 348 t, err := l.forest.GetTrie(ledger.RootHash(state)) 349 if err != nil { 350 rh, _ := l.forest.MostRecentTouchedRootHash() 351 l.logger.Info(). 352 Str("hash", rh.String()). 353 Msgf("Most recently touched root hash.") 354 return ledger.State(hash.DummyHash), 355 fmt.Errorf("cannot get trie at the given state commitment: %w", err) 356 } 357 358 // clean up tries to release memory 359 err = l.keepOnlyOneTrie(state) 360 if err != nil { 361 return ledger.State(hash.DummyHash), 362 fmt.Errorf("failed to clean up tries to reduce memory usage: %w", err) 363 } 364 365 var payloads []ledger.Payload 366 var newTrie *trie.MTrie 367 368 noMigration := len(migrations) == 0 369 370 if noMigration { 371 // when there is no migration, reuse the trie without rebuilding it 372 newTrie = t 373 // when there is no migration, we don't generate the payloads here until later running the 374 // postCheckpointReporters, because the ExportReporter is currently the only 375 // preCheckpointReporters, which doesn't use the payloads. 376 } else { 377 // get all payloads 378 payloads = t.AllPayloads() 379 payloadSize := len(payloads) 380 381 // migrate payloads 382 for i, migrate := range migrations { 383 l.logger.Info().Msgf("migration %d/%d is underway", i, len(migrations)) 384 385 start := time.Now() 386 payloads, err = migrate(payloads) 387 elapsed := time.Since(start) 388 389 if err != nil { 390 return ledger.State(hash.DummyHash), fmt.Errorf("error applying migration (%d): %w", i, err) 391 } 392 393 newPayloadSize := len(payloads) 394 395 if payloadSize != newPayloadSize { 396 l.logger.Warn(). 397 Int("migration_step", i). 398 Int("expected_size", payloadSize). 399 Int("outcome_size", newPayloadSize). 400 Msg("payload counts has changed during migration, make sure this is expected.") 401 } 402 l.logger.Info().Str("timeTaken", elapsed.String()).Msgf("migration %d is done", i) 403 404 payloadSize = newPayloadSize 405 } 406 407 l.logger.Info().Msgf("creating paths for %v payloads", len(payloads)) 408 409 // get paths 410 paths, err := pathfinder.PathsFromPayloads(payloads, targetPathFinderVersion) 411 if err != nil { 412 return ledger.State(hash.DummyHash), fmt.Errorf("cannot export checkpoint, can't construct paths: %w", err) 413 } 414 415 l.logger.Info().Msgf("constructing a new trie with migrated payloads (count: %d)...", len(payloads)) 416 417 emptyTrie := trie.NewEmptyMTrie() 418 419 // no need to prune the data since it has already been prunned through migrations 420 applyPruning := false 421 newTrie, _, err = trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, applyPruning) 422 if err != nil { 423 return ledger.State(hash.DummyHash), fmt.Errorf("constructing updated trie failed: %w", err) 424 } 425 } 426 427 statecommitment := ledger.State(newTrie.RootHash()) 428 429 l.logger.Info().Msgf("successfully built new trie. NEW ROOT STATECOMMIEMENT: %v", statecommitment.String()) 430 431 l.logger.Info().Msgf("running pre-checkpoint reporters") 432 // run post migration reporters 433 for i, reporter := range preCheckpointReporters { 434 l.logger.Info().Msgf("running a pre-checkpoint generation reporter: %s, (%v/%v)", reporter.Name(), i, len(preCheckpointReporters)) 435 err := runReport(reporter, payloads, statecommitment, l.logger) 436 if err != nil { 437 return ledger.State(hash.DummyHash), err 438 } 439 } 440 441 l.logger.Info().Msgf("finished running pre-checkpoint reporters") 442 443 l.logger.Info().Msg("creating a checkpoint for the new trie, storing the checkpoint to the file") 444 445 err = os.MkdirAll(outputDir, os.ModePerm) 446 if err != nil { 447 return ledger.State(hash.DummyHash), fmt.Errorf("could not create output dir %s: %w", outputDir, err) 448 } 449 450 err = realWAL.StoreCheckpointV6Concurrently([]*trie.MTrie{newTrie}, outputDir, outputFile, &l.logger) 451 452 // Writing the checkpoint takes time to write and copy. 453 // Without relying on an exit code or stdout, we need to know when the copy is complete. 454 writeStatusFileErr := writeStatusFile("checkpoint_status.json", err) 455 if writeStatusFileErr != nil { 456 return ledger.State(hash.DummyHash), fmt.Errorf("failed to write checkpoint status file: %w", writeStatusFileErr) 457 } 458 459 if err != nil { 460 return ledger.State(hash.DummyHash), fmt.Errorf("failed to store the checkpoint: %w", err) 461 } 462 463 l.logger.Info().Msgf("checkpoint file successfully stored at: %v %v", outputDir, outputFile) 464 465 l.logger.Info().Msgf("start running post-checkpoint reporters") 466 467 if noMigration { 468 // when there is no mgiration, we generate the payloads now before 469 // running the postCheckpointReporters 470 payloads = newTrie.AllPayloads() 471 } 472 473 // running post checkpoint reporters 474 for i, reporter := range postCheckpointReporters { 475 l.logger.Info().Msgf("running a post-checkpoint generation reporter: %s, (%v/%v)", reporter.Name(), i, len(postCheckpointReporters)) 476 err := runReport(reporter, payloads, statecommitment, l.logger) 477 if err != nil { 478 return ledger.State(hash.DummyHash), err 479 } 480 } 481 482 l.logger.Info().Msgf("ran all post-checkpoint reporters") 483 484 return statecommitment, nil 485 } 486 487 // MostRecentTouchedState returns a state which is most recently touched. 488 func (l *Ledger) MostRecentTouchedState() (ledger.State, error) { 489 root, err := l.forest.MostRecentTouchedRootHash() 490 return ledger.State(root), err 491 } 492 493 // HasState returns true if the given state exists inside the ledger 494 func (l *Ledger) HasState(state ledger.State) bool { 495 return l.forest.HasTrie(ledger.RootHash(state)) 496 } 497 498 // DumpTrieAsJSON export trie at specific state as JSONL (each line is JSON encoding of a payload) 499 func (l *Ledger) DumpTrieAsJSON(state ledger.State, writer io.Writer) error { 500 fmt.Println(ledger.RootHash(state)) 501 trie, err := l.forest.GetTrie(ledger.RootHash(state)) 502 if err != nil { 503 return fmt.Errorf("cannot find the target trie: %w", err) 504 } 505 return trie.DumpAsJSON(writer) 506 } 507 508 // this operation should only be used for exporting 509 func (l *Ledger) keepOnlyOneTrie(state ledger.State) error { 510 // don't write things to WALs 511 l.wal.PauseRecord() 512 defer l.wal.UnpauseRecord() 513 return l.forest.PurgeCacheExcept(ledger.RootHash(state)) 514 } 515 516 func runReport(r ledger.Reporter, p []ledger.Payload, commit ledger.State, l zerolog.Logger) error { 517 l.Info(). 518 Str("name", r.Name()). 519 Msg("starting reporter") 520 521 start := time.Now() 522 err := r.Report(p, commit) 523 elapsed := time.Since(start) 524 525 l.Info(). 526 Str("timeTaken", elapsed.String()). 527 Str("name", r.Name()). 528 Msg("reporter done") 529 if err != nil { 530 return fmt.Errorf("error running reporter (%s): %w", r.Name(), err) 531 } 532 return nil 533 } 534 535 func writeStatusFile(fileName string, e error) error { 536 checkpointStatus := map[string]bool{"succeeded": e == nil} 537 checkpointStatusJson, _ := json.MarshalIndent(checkpointStatus, "", " ") 538 err := os.WriteFile(fileName, checkpointStatusJson, 0644) 539 return err 540 }