github.com/onflow/flow-go@v0.33.17/ledger/complete/wal/checkpoint_v6_reader.go (about) 1 package wal 2 3 import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "io" 8 "os" 9 "path" 10 "path/filepath" 11 12 "github.com/rs/zerolog" 13 14 "github.com/onflow/flow-go/ledger" 15 "github.com/onflow/flow-go/ledger/complete/mtrie/flattener" 16 "github.com/onflow/flow-go/ledger/complete/mtrie/node" 17 "github.com/onflow/flow-go/ledger/complete/mtrie/trie" 18 ) 19 20 // ErrEOFNotReached for indicating end of file not reached error 21 var ErrEOFNotReached = errors.New("expect to reach EOF, but actually didn't") 22 23 // TODO: validate the header file and the sub file that contains the root hashes 24 var ReadTriesRootHash = readTriesRootHash 25 var CheckpointHasRootHash = checkpointHasRootHash 26 27 // readCheckpointV6 reads checkpoint file from a main file and 17 file parts. 28 // the main file stores: 29 // - version 30 // - checksum of each part file (17 in total) 31 // - checksum of the main file itself 32 // the first 16 files parts contain the trie nodes below the subtrieLevel 33 // the last part file contains the top level trie nodes above the subtrieLevel and all the trie root nodes. 34 // 35 // it returns (tries, nil) if there was no error 36 // it returns (nil, os.ErrNotExist) if a certain file is missing, use (os.IsNotExist to check) 37 // it returns (nil, ErrEOFNotReached) if a certain part file is malformed 38 // it returns (nil, err) if running into any exception 39 func readCheckpointV6(headerFile *os.File, logger zerolog.Logger) ([]*trie.MTrie, error) { 40 // the full path of header file 41 headerPath := headerFile.Name() 42 dir, fileName := filepath.Split(headerPath) 43 44 lg := logger.With().Str("checkpoint_file", headerPath).Logger() 45 lg.Info().Msgf("reading v6 checkpoint file") 46 47 subtrieChecksums, topTrieChecksum, err := readCheckpointHeader(headerPath, logger) 48 if err != nil { 49 return nil, fmt.Errorf("could not read header: %w", err) 50 } 51 52 // ensure all checkpoint part file exists, might return os.ErrNotExist error 53 // if a file is missing 54 err = allPartFileExist(dir, fileName, len(subtrieChecksums)) 55 if err != nil { 56 return nil, fmt.Errorf("fail to check all checkpoint part file exist: %w", err) 57 } 58 59 // TODO making number of goroutine configable for reading subtries, which can help us 60 // test the code on machines that don't have as much RAM as EN by using fewer goroutines. 61 subtrieNodes, err := readSubTriesConcurrently(dir, fileName, subtrieChecksums, lg) 62 if err != nil { 63 return nil, fmt.Errorf("could not read subtrie from dir: %w", err) 64 } 65 66 lg.Info().Uint32("topsum", topTrieChecksum). 67 Msg("finish reading all v6 subtrie files, start reading top level tries") 68 69 tries, err := readTopLevelTries(dir, fileName, subtrieNodes, topTrieChecksum, lg) 70 if err != nil { 71 return nil, fmt.Errorf("could not read top level nodes or tries: %w", err) 72 } 73 74 lg.Info().Msgf("finish reading all trie roots, trie root count: %v", len(tries)) 75 76 if len(tries) > 0 { 77 first, last := tries[0], tries[len(tries)-1] 78 logger.Info(). 79 Str("first_hash", first.RootHash().String()). 80 Uint64("first_reg_count", first.AllocatedRegCount()). 81 Str("last_hash", last.RootHash().String()). 82 Uint64("last_reg_count", last.AllocatedRegCount()). 83 Int("version", 6). 84 Msg("checkpoint tries roots") 85 } 86 87 return tries, nil 88 } 89 90 // OpenAndReadCheckpointV6 open the checkpoint file and read it with readCheckpointV6 91 func OpenAndReadCheckpointV6(dir string, fileName string, logger zerolog.Logger) ( 92 triesToReturn []*trie.MTrie, 93 errToReturn error, 94 ) { 95 96 filepath := filePathCheckpointHeader(dir, fileName) 97 errToReturn = withFile(logger, filepath, func(file *os.File) error { 98 tries, err := readCheckpointV6(file, logger) 99 if err != nil { 100 return err 101 } 102 triesToReturn = tries 103 return nil 104 }) 105 106 return triesToReturn, errToReturn 107 } 108 109 // ReadCheckpointFileSize returns the total size of the checkpoint file 110 func ReadCheckpointFileSize(dir string, fileName string) (uint64, error) { 111 paths := allFilePaths(dir, fileName) 112 totalSize := uint64(0) 113 for _, path := range paths { 114 fileInfo, err := os.Stat(path) 115 if err != nil { 116 return 0, fmt.Errorf("could not get file info for %v: %w", path, err) 117 } 118 119 totalSize += uint64(fileInfo.Size()) 120 } 121 122 return totalSize, nil 123 } 124 125 func allFilePaths(dir string, fileName string) []string { 126 paths := make([]string, 0, 1+subtrieCount+1) 127 paths = append(paths, filePathCheckpointHeader(dir, fileName)) 128 for i := 0; i < subtrieCount; i++ { 129 subTriePath, _, _ := filePathSubTries(dir, fileName, i) 130 paths = append(paths, subTriePath) 131 } 132 topTriePath, _ := filePathTopTries(dir, fileName) 133 paths = append(paths, topTriePath) 134 return paths 135 } 136 137 func filePathCheckpointHeader(dir string, fileName string) string { 138 return path.Join(dir, fileName) 139 } 140 141 func filePathSubTries(dir string, fileName string, index int) (string, string, error) { 142 if index < 0 || index > (subtrieCount-1) { 143 return "", "", fmt.Errorf("index must be between 0 to %v, but got %v", subtrieCount-1, index) 144 } 145 subTrieFileName := partFileName(fileName, index) 146 return path.Join(dir, subTrieFileName), subTrieFileName, nil 147 } 148 149 func filePathTopTries(dir string, fileName string) (string, string) { 150 topTriesFileName := partFileName(fileName, subtrieCount) 151 return path.Join(dir, topTriesFileName), topTriesFileName 152 } 153 154 func partFileName(fileName string, index int) string { 155 return fmt.Sprintf("%v.%03d", fileName, index) 156 } 157 158 func filePathPattern(dir string, fileName string) string { 159 return fmt.Sprintf("%v*", filePathCheckpointHeader(dir, fileName)) 160 } 161 162 // readCheckpointHeader takes a file path and returns subtrieChecksums and topTrieChecksum 163 // any error returned are exceptions 164 func readCheckpointHeader(filepath string, logger zerolog.Logger) ( 165 checksumsOfSubtries []uint32, 166 checksumOfTopTrie uint32, 167 errToReturn error, 168 ) { 169 closable, err := os.Open(filepath) 170 if err != nil { 171 return nil, 0, fmt.Errorf("could not open header file: %w", err) 172 } 173 174 defer func(file *os.File) { 175 evictErr := evictFileFromLinuxPageCache(file, false, logger) 176 if evictErr != nil { 177 logger.Warn().Msgf("failed to evict header file %s from Linux page cache: %s", filepath, evictErr) 178 // No need to return this error because it's possible to continue normal operations. 179 } 180 errToReturn = closeAndMergeError(file, errToReturn) 181 }(closable) 182 183 var bufReader io.Reader = bufio.NewReaderSize(closable, defaultBufioReadSize) 184 reader := NewCRC32Reader(bufReader) 185 // read the magic bytes and check version 186 err = validateFileHeader(MagicBytesCheckpointHeader, VersionV6, reader) 187 if err != nil { 188 return nil, 0, err 189 } 190 191 // read the subtrie count 192 subtrieCount, err := readSubtrieCount(reader) 193 if err != nil { 194 return nil, 0, err 195 } 196 197 subtrieChecksums := make([]uint32, subtrieCount) 198 for i := uint16(0); i < subtrieCount; i++ { 199 sum, err := readCRC32Sum(reader) 200 if err != nil { 201 return nil, 0, fmt.Errorf("could not read %v-th subtrie checksum from checkpoint header: %w", i, err) 202 } 203 subtrieChecksums[i] = sum 204 } 205 206 // read top level trie checksum 207 topTrieChecksum, err := readCRC32Sum(reader) 208 if err != nil { 209 return nil, 0, fmt.Errorf("could not read checkpoint top level trie checksum in chechpoint summary: %w", err) 210 } 211 212 // calculate the actual checksum 213 actualSum := reader.Crc32() 214 215 // read the stored checksum, and compare with the actual sum 216 expectedSum, err := readCRC32Sum(reader) 217 if err != nil { 218 return nil, 0, fmt.Errorf("could not read checkpoint header checksum: %w", err) 219 } 220 221 if actualSum != expectedSum { 222 return nil, 0, fmt.Errorf("invalid checksum in checkpoint header, expected %v, actual %v", 223 expectedSum, actualSum) 224 } 225 226 err = ensureReachedEOF(reader) 227 if err != nil { 228 return nil, 0, fmt.Errorf("fail to read checkpoint header file: %w", err) 229 } 230 231 return subtrieChecksums, topTrieChecksum, nil 232 } 233 234 // allPartFileExist check if all the part files of the checkpoint file exist 235 // it returns nil if all files exist 236 // it returns os.ErrNotExist if some file is missing, use (os.IsNotExist to check) 237 // it returns err if running into any exception 238 func allPartFileExist(dir string, fileName string, totalSubtrieFiles int) error { 239 matched, err := findCheckpointPartFiles(dir, fileName) 240 if err != nil { 241 return fmt.Errorf("could not check all checkpoint part file exist: %w", err) 242 } 243 244 // header + subtrie files + top level file 245 if len(matched) != 1+totalSubtrieFiles+1 { 246 return fmt.Errorf("some checkpoint part file is missing. found part files %v. err :%w", 247 matched, os.ErrNotExist) 248 } 249 250 return nil 251 } 252 253 // findCheckpointPartFiles returns a slice of file full paths of the part files for the checkpoint file 254 // with the given fileName under the given folder. 255 // - it return the matching part files, note it might not contains all the part files. 256 // - it return error if running any exception 257 func findCheckpointPartFiles(dir string, fileName string) ([]string, error) { 258 pattern := filePathPattern(dir, fileName) 259 matched, err := filepath.Glob(pattern) 260 if err != nil { 261 return nil, fmt.Errorf("could not find checkpoint files: %w", err) 262 } 263 264 // build a lookup with matched 265 lookup := make(map[string]struct{}) 266 for _, match := range matched { 267 lookup[match] = struct{}{} 268 } 269 270 headerPath := filePathCheckpointHeader(dir, fileName) 271 parts := make([]string, 0) 272 // check header exists 273 _, ok := lookup[headerPath] 274 if ok { 275 parts = append(parts, headerPath) 276 delete(lookup, headerPath) 277 } 278 279 // check all subtrie parts 280 for i := 0; i < subtrieCount; i++ { 281 subtriePath, _, err := filePathSubTries(dir, fileName, i) 282 if err != nil { 283 return nil, err 284 } 285 _, ok := lookup[subtriePath] 286 if ok { 287 parts = append(parts, subtriePath) 288 delete(lookup, subtriePath) 289 } 290 } 291 292 // check top level trie part file 293 toplevelPath, _ := filePathTopTries(dir, fileName) 294 295 _, ok = lookup[toplevelPath] 296 if ok { 297 parts = append(parts, toplevelPath) 298 delete(lookup, toplevelPath) 299 } 300 301 return parts, nil 302 } 303 304 type jobReadSubtrie struct { 305 Index int 306 Checksum uint32 307 Result chan<- *resultReadSubTrie 308 } 309 310 type resultReadSubTrie struct { 311 Nodes []*node.Node 312 Err error 313 } 314 315 func readSubTriesConcurrently(dir string, fileName string, subtrieChecksums []uint32, logger zerolog.Logger) ([][]*node.Node, error) { 316 317 numOfSubTries := len(subtrieChecksums) 318 jobs := make(chan jobReadSubtrie, numOfSubTries) 319 resultChs := make([]<-chan *resultReadSubTrie, numOfSubTries) 320 321 // push all jobs into the channel 322 for i, checksum := range subtrieChecksums { 323 resultCh := make(chan *resultReadSubTrie) 324 resultChs[i] = resultCh 325 jobs <- jobReadSubtrie{ 326 Index: i, 327 Checksum: checksum, 328 Result: resultCh, 329 } 330 } 331 close(jobs) 332 333 // TODO: make nWorker configable 334 nWorker := numOfSubTries // use as many worker as the jobs to read subtries concurrently 335 for i := 0; i < nWorker; i++ { 336 go func() { 337 for job := range jobs { 338 nodes, err := readCheckpointSubTrie(dir, fileName, job.Index, job.Checksum, logger) 339 job.Result <- &resultReadSubTrie{ 340 Nodes: nodes, 341 Err: err, 342 } 343 close(job.Result) 344 } 345 }() 346 } 347 348 // reading job results in the same order as their indices 349 nodesGroups := make([][]*node.Node, 0, len(resultChs)) 350 for i, resultCh := range resultChs { 351 result := <-resultCh 352 if result.Err != nil { 353 return nil, fmt.Errorf("fail to read %v-th subtrie, trie: %w", i, result.Err) 354 } 355 356 nodesGroups = append(nodesGroups, result.Nodes) 357 } 358 359 return nodesGroups, nil 360 } 361 362 func readCheckpointSubTrie(dir string, fileName string, index int, checksum uint32, logger zerolog.Logger) ( 363 []*node.Node, 364 error, 365 ) { 366 var nodes []*node.Node 367 err := processCheckpointSubTrie(dir, fileName, index, checksum, logger, 368 func(reader *Crc32Reader, nodesCount uint64) error { 369 scratch := make([]byte, 1024*4) // must not be less than 1024 370 371 nodes = make([]*node.Node, nodesCount+1) //+1 for 0 index meaning nil 372 logging := logProgress(fmt.Sprintf("reading %v-th sub trie roots", index), int(nodesCount), logger) 373 for i := uint64(1); i <= nodesCount; i++ { 374 node, err := flattener.ReadNode(reader, scratch, func(nodeIndex uint64) (*node.Node, error) { 375 if nodeIndex >= i { 376 return nil, fmt.Errorf("sequence of serialized nodes does not satisfy Descendents-First-Relationship") 377 } 378 return nodes[nodeIndex], nil 379 }) 380 if err != nil { 381 return fmt.Errorf("cannot read node %d: %w", i, err) 382 } 383 nodes[i] = node 384 logging(i) 385 } 386 return nil 387 }) 388 389 if err != nil { 390 return nil, err 391 } 392 393 // since nodes[0] is always `nil`, returning a slice without nodes[0] could simplify the 394 // implementation of getNodeByIndex 395 // return nodes[1:], nil 396 return nodes[1:], nil 397 } 398 399 // subtrie file contains: 400 // 1. checkpoint version 401 // 2. nodes 402 // 3. node count 403 // 4. checksum 404 func processCheckpointSubTrie( 405 dir string, 406 fileName string, 407 index int, 408 checksum uint32, 409 logger zerolog.Logger, 410 processNode func(*Crc32Reader, uint64) error, 411 ) error { 412 413 filepath, _, err := filePathSubTries(dir, fileName, index) 414 if err != nil { 415 return err 416 } 417 return withFile(logger, filepath, func(f *os.File) error { 418 // valite the magic bytes and version 419 err := validateFileHeader(MagicBytesCheckpointSubtrie, VersionV6, f) 420 if err != nil { 421 return err 422 } 423 424 nodesCount, expectedSum, err := readSubTriesFooter(f) 425 if err != nil { 426 return fmt.Errorf("cannot read sub trie node count: %w", err) 427 } 428 429 if checksum != expectedSum { 430 return fmt.Errorf("mismatch checksum in subtrie file. checksum from checkpoint header %v does not "+ 431 "match with the checksum in subtrie file %v", checksum, expectedSum) 432 } 433 434 // restart from the beginning of the file, make sure Crc32Reader has seen all the bytes 435 // in order to compute the correct checksum 436 _, err = f.Seek(0, io.SeekStart) 437 if err != nil { 438 return fmt.Errorf("cannot seek to start of file: %w", err) 439 } 440 441 reader := NewCRC32Reader(bufio.NewReaderSize(f, defaultBufioReadSize)) 442 443 // read version again for calculating checksum 444 _, _, err = readFileHeader(reader) 445 if err != nil { 446 return fmt.Errorf("could not read version again for subtrie: %w", err) 447 } 448 449 // read file part index and verify 450 451 err = processNode(reader, nodesCount) 452 if err != nil { 453 return err 454 } 455 456 scratch := make([]byte, 1024) 457 // read footer and discard, since we only care about checksum 458 _, err = io.ReadFull(reader, scratch[:encNodeCountSize]) 459 if err != nil { 460 return fmt.Errorf("cannot read footer: %w", err) 461 } 462 463 // calculate the actual checksum 464 actualSum := reader.Crc32() 465 466 if actualSum != expectedSum { 467 return fmt.Errorf("invalid checksum in subtrie checkpoint, expected %v, actual %v", 468 expectedSum, actualSum) 469 } 470 471 // read the checksum and discard, since we only care about whether ensureReachedEOF 472 _, err = io.ReadFull(reader, scratch[:crc32SumSize]) 473 if err != nil { 474 return fmt.Errorf("could not read subtrie file's checksum: %w", err) 475 } 476 477 err = ensureReachedEOF(reader) 478 if err != nil { 479 return fmt.Errorf("fail to read %v-th sutrie file: %w", index, err) 480 } 481 482 return nil 483 }) 484 } 485 486 func readSubTriesFooter(f *os.File) (uint64, uint32, error) { 487 const footerSize = encNodeCountSize // footer doesn't include crc32 sum 488 const footerOffset = footerSize + crc32SumSize 489 _, err := f.Seek(-footerOffset, io.SeekEnd) 490 if err != nil { 491 return 0, 0, fmt.Errorf("cannot seek to footer: %w", err) 492 } 493 494 footer := make([]byte, footerSize) 495 _, err = io.ReadFull(f, footer) 496 if err != nil { 497 return 0, 0, fmt.Errorf("could not read footer: %w", err) 498 } 499 500 nodeCount, err := decodeNodeCount(footer) 501 if err != nil { 502 return 0, 0, fmt.Errorf("could not decode subtrie node count: %w", err) 503 } 504 505 // the subtrie checksum from the checkpoint header file must be same 506 // as the checksum included in the subtrie file 507 expectedSum, err := readCRC32Sum(f) 508 if err != nil { 509 return 0, 0, fmt.Errorf("cannot read checksum for sub trie file: %w", err) 510 } 511 512 return nodeCount, expectedSum, nil 513 } 514 515 // 17th part file contains: 516 // 1. checkpoint version 517 // 2. subtrieNodeCount 518 // 3. top level nodes 519 // 4. trie roots 520 // 5. node count 521 // 6. trie count 522 // 7. checksum 523 func readTopLevelTries(dir string, fileName string, subtrieNodes [][]*node.Node, topTrieChecksum uint32, logger zerolog.Logger) ( 524 rootTriesToReturn []*trie.MTrie, 525 errToReturn error, 526 ) { 527 528 filepath, _ := filePathTopTries(dir, fileName) 529 errToReturn = withFile(logger, filepath, func(file *os.File) error { 530 // read and validate magic bytes and version 531 err := validateFileHeader(MagicBytesCheckpointToptrie, VersionV6, file) 532 if err != nil { 533 return err 534 } 535 536 // read subtrie Node count and validate 537 topLevelNodesCount, triesCount, expectedSum, err := readTopTriesFooter(file) 538 if err != nil { 539 return fmt.Errorf("could not read top tries footer: %w", err) 540 } 541 542 if topTrieChecksum != expectedSum { 543 return fmt.Errorf("mismatch top trie checksum, header file has %v, toptrie file has %v", 544 topTrieChecksum, expectedSum) 545 } 546 547 // restart from the beginning of the file, make sure CRC32Reader has seen all the bytes 548 // in order to compute the correct checksum 549 _, err = file.Seek(0, io.SeekStart) 550 if err != nil { 551 return fmt.Errorf("could not seek to 0: %w", err) 552 } 553 554 reader := NewCRC32Reader(bufio.NewReaderSize(file, defaultBufioReadSize)) 555 556 // read version again for calculating checksum 557 _, _, err = readFileHeader(reader) 558 if err != nil { 559 return fmt.Errorf("could not read version for top trie: %w", err) 560 } 561 562 // read subtrie count and validate 563 buf := make([]byte, encNodeCountSize) 564 _, err = io.ReadFull(reader, buf) 565 if err != nil { 566 return fmt.Errorf("could not read subtrie node count: %w", err) 567 } 568 readSubtrieNodeCount, err := decodeNodeCount(buf) 569 if err != nil { 570 return fmt.Errorf("could not decode node count: %w", err) 571 } 572 573 totalSubTrieNodeCount := computeTotalSubTrieNodeCount(subtrieNodes) 574 575 if readSubtrieNodeCount != totalSubTrieNodeCount { 576 return fmt.Errorf("mismatch subtrie node count, read from disk (%v), but got actual node count (%v)", 577 readSubtrieNodeCount, totalSubTrieNodeCount) 578 } 579 580 topLevelNodes := make([]*node.Node, topLevelNodesCount+1) //+1 for 0 index meaning nil 581 tries := make([]*trie.MTrie, triesCount) 582 583 // Scratch buffer is used as temporary buffer that reader can read into. 584 // Raw data in scratch buffer should be copied or converted into desired 585 // objects before next Read operation. If the scratch buffer isn't large 586 // enough, a new buffer will be allocated. However, 4096 bytes will 587 // be large enough to handle almost all payloads and 100% of interim nodes. 588 scratch := make([]byte, 1024*4) // must not be less than 1024 589 590 // read the nodes from subtrie level to the root level 591 for i := uint64(1); i <= topLevelNodesCount; i++ { 592 node, err := flattener.ReadNode(reader, scratch, func(nodeIndex uint64) (*node.Node, error) { 593 if nodeIndex >= i+uint64(totalSubTrieNodeCount) { 594 return nil, fmt.Errorf("sequence of serialized nodes does not satisfy Descendents-First-Relationship") 595 } 596 597 return getNodeByIndex(subtrieNodes, totalSubTrieNodeCount, topLevelNodes, nodeIndex) 598 }) 599 if err != nil { 600 return fmt.Errorf("cannot read node at index %d: %w", i, err) 601 } 602 603 topLevelNodes[i] = node 604 } 605 606 // read the trie root nodes 607 for i := uint16(0); i < triesCount; i++ { 608 trie, err := flattener.ReadTrie(reader, scratch, func(nodeIndex uint64) (*node.Node, error) { 609 return getNodeByIndex(subtrieNodes, totalSubTrieNodeCount, topLevelNodes, nodeIndex) 610 }) 611 612 if err != nil { 613 return fmt.Errorf("cannot read root trie at index %d: %w", i, err) 614 } 615 tries[i] = trie 616 } 617 618 // read footer and discard, since we only care about checksum 619 _, err = io.ReadFull(reader, scratch[:encNodeCountSize+encTrieCountSize]) 620 if err != nil { 621 return fmt.Errorf("cannot read footer: %w", err) 622 } 623 624 actualSum := reader.Crc32() 625 626 if actualSum != expectedSum { 627 return fmt.Errorf("invalid checksum in top level trie, expected %v, actual %v", 628 expectedSum, actualSum) 629 } 630 631 // read the checksum and discard, since we only care about whether ensureReachedEOF 632 _, err = io.ReadFull(reader, scratch[:crc32SumSize]) 633 if err != nil { 634 return fmt.Errorf("could not read checksum from top trie file: %w", err) 635 } 636 637 err = ensureReachedEOF(reader) 638 if err != nil { 639 return fmt.Errorf("fail to read top trie file: %w", err) 640 } 641 642 rootTriesToReturn = tries 643 return nil 644 }) 645 return rootTriesToReturn, errToReturn 646 } 647 648 func readTriesRootHash(logger zerolog.Logger, dir string, fileName string) ( 649 trieRootsToReturn []ledger.RootHash, 650 errToReturn error, 651 ) { 652 653 filepath, _ := filePathTopTries(dir, fileName) 654 errToReturn = withFile(logger, filepath, func(file *os.File) error { 655 var err error 656 657 // read and validate magic bytes and version 658 err = validateFileHeader(MagicBytesCheckpointToptrie, VersionV6, file) 659 if err != nil { 660 return err 661 } 662 663 // read subtrie Node count and validate 664 _, triesCount, _, err := readTopTriesFooter(file) 665 if err != nil { 666 return fmt.Errorf("could not read top tries footer: %w", err) 667 } 668 669 footerOffset := encNodeCountSize + encTrieCountSize + crc32SumSize 670 trieRootOffset := footerOffset + flattener.EncodedTrieSize*int(triesCount) 671 672 _, err = file.Seek(int64(-trieRootOffset), io.SeekEnd) 673 if err != nil { 674 return fmt.Errorf("could not seek to 0: %w", err) 675 } 676 677 reader := bufio.NewReaderSize(file, defaultBufioReadSize) 678 trieRoots := make([]ledger.RootHash, 0, triesCount) 679 scratch := make([]byte, 1024*4) // must not be less than 1024 680 for i := 0; i < int(triesCount); i++ { 681 trieRootNode, err := flattener.ReadEncodedTrie(reader, scratch) 682 if err != nil { 683 return fmt.Errorf("could not read trie root node: %w", err) 684 } 685 686 trieRoots = append(trieRoots, ledger.RootHash(trieRootNode.RootHash)) 687 } 688 689 trieRootsToReturn = trieRoots 690 return nil 691 }) 692 return trieRootsToReturn, errToReturn 693 } 694 695 // checkpointHasRootHash check if the given checkpoint file contains the expected root hash 696 func checkpointHasRootHash(logger zerolog.Logger, bootstrapDir, filename string, expectedRootHash ledger.RootHash) error { 697 roots, err := ReadTriesRootHash(logger, bootstrapDir, filename) 698 if err != nil { 699 return fmt.Errorf("could not read checkpoint root hash: %w", err) 700 } 701 702 if len(roots) == 0 { 703 return fmt.Errorf("no root hash found in checkpoint file") 704 } 705 706 for i, root := range roots { 707 if root == expectedRootHash { 708 logger.Info().Msgf("found matching checkpoint root hash at index: %v, checkpoint total trie roots: %v", 709 i, len(roots)) 710 // found the expected commit 711 return nil 712 } 713 } 714 715 return fmt.Errorf("could not find expected root hash %v in checkpoint file which contains: %v ", expectedRootHash, roots) 716 } 717 718 func readFileHeader(reader io.Reader) (uint16, uint16, error) { 719 bytes := make([]byte, encMagicSize+encVersionSize) 720 _, err := io.ReadFull(reader, bytes) 721 if err != nil { 722 return 0, 0, fmt.Errorf("cannot read magic ID and version: %w", err) 723 } 724 return decodeVersion(bytes) 725 } 726 727 func validateFileHeader(expectedMagic uint16, expectedVersion uint16, reader io.Reader) error { 728 magic, version, err := readFileHeader(reader) 729 if err != nil { 730 return err 731 } 732 733 if magic != expectedMagic { 734 return fmt.Errorf("wrong magic bytes, expect %#x, bot got: %#x", expectedMagic, magic) 735 } 736 737 if version != expectedVersion { 738 return fmt.Errorf("wrong version, expect %v, bot got: %v", expectedVersion, version) 739 } 740 741 return nil 742 } 743 744 func readSubtrieCount(reader io.Reader) (uint16, error) { 745 bytes := make([]byte, encSubtrieCountSize) 746 _, err := io.ReadFull(reader, bytes) 747 if err != nil { 748 return 0, err 749 } 750 return decodeSubtrieCount(bytes) 751 } 752 753 func readCRC32Sum(reader io.Reader) (uint32, error) { 754 bytes := make([]byte, crc32SumSize) 755 _, err := io.ReadFull(reader, bytes) 756 if err != nil { 757 return 0, err 758 } 759 return decodeCRC32Sum(bytes) 760 } 761 762 func readTopTriesFooter(f *os.File) (uint64, uint16, uint32, error) { 763 // footer offset: nodes count (8 bytes) + tries count (2 bytes) + CRC32 sum (4 bytes) 764 const footerOffset = encNodeCountSize + encTrieCountSize + crc32SumSize 765 const footerSize = encNodeCountSize + encTrieCountSize // footer doesn't include crc32 sum 766 // Seek to footer 767 _, err := f.Seek(-footerOffset, io.SeekEnd) 768 if err != nil { 769 return 0, 0, 0, fmt.Errorf("cannot seek to footer: %w", err) 770 } 771 footer := make([]byte, footerSize) 772 _, err = io.ReadFull(f, footer) 773 if err != nil { 774 return 0, 0, 0, fmt.Errorf("cannot read footer: %w", err) 775 } 776 777 nodeCount, trieCount, err := decodeTopLevelNodesAndTriesFooter(footer) 778 if err != nil { 779 return 0, 0, 0, fmt.Errorf("could not decode top trie footer: %w", err) 780 } 781 782 checksum, err := readCRC32Sum(f) 783 if err != nil { 784 return 0, 0, 0, fmt.Errorf("cannot read checksum for top trie file: %w", err) 785 } 786 return nodeCount, trieCount, checksum, nil 787 } 788 789 func computeTotalSubTrieNodeCount(groups [][]*node.Node) uint64 { 790 total := 0 791 for _, group := range groups { 792 total += len(group) 793 } 794 return uint64(total) 795 } 796 797 // get a node by node index. 798 // Note: node index start from 1. 799 // subtries contains subtrie node groups. subtries[i][0] is NOT nil. 800 // topLevelNodes contains top level nodes. topLevelNodes[0] is nil. 801 // any error returned are exceptions 802 func getNodeByIndex(subtrieNodes [][]*node.Node, totalSubTrieNodeCount uint64, topLevelNodes []*node.Node, index uint64) (*node.Node, error) { 803 if index == 0 { 804 // item at index 0 is for nil 805 return nil, nil 806 } 807 808 if index > totalSubTrieNodeCount { 809 return getTopNodeByIndex(totalSubTrieNodeCount, topLevelNodes, index) 810 } 811 812 offset := index - 1 // index > 0, won't underflow 813 for _, subtries := range subtrieNodes { 814 if int(offset) < len(subtries) { 815 return subtries[offset], nil 816 } 817 818 offset -= uint64(len(subtries)) 819 } 820 821 return nil, fmt.Errorf("could not find node by index %v, totalSubTrieNodeCount %v", index, totalSubTrieNodeCount) 822 } 823 824 func getTopNodeByIndex(totalSubTrieNodeCount uint64, topLevelNodes []*node.Node, index uint64) (*node.Node, error) { 825 nodePos := index - totalSubTrieNodeCount 826 827 if nodePos >= uint64(len(topLevelNodes)) { 828 return nil, fmt.Errorf("can not find node by index %v, nodePos >= len(topLevelNodes) => (%v > %v)", 829 index, nodePos, len(topLevelNodes)) 830 } 831 832 return topLevelNodes[nodePos], nil 833 } 834 835 // ensureReachedEOF checks if the reader has reached end of file 836 // it returns nil if reached EOF 837 // it returns ErrEOFNotReached if didn't reach end of file 838 // any error returned are exception 839 func ensureReachedEOF(reader io.Reader) error { 840 b := make([]byte, 1) 841 _, err := reader.Read(b) 842 if errors.Is(err, io.EOF) { 843 return nil 844 } 845 846 if err == nil { 847 return ErrEOFNotReached 848 } 849 850 return fmt.Errorf("fail to check if reached EOF: %w", err) 851 }