github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/ledger/complete/wal/checkpoint_v6_reader.go (about) 1 package wal 2 3 import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "io" 8 "os" 9 "path" 10 "path/filepath" 11 12 "github.com/rs/zerolog" 13 14 "github.com/onflow/flow-go/ledger" 15 "github.com/onflow/flow-go/ledger/complete/mtrie/flattener" 16 "github.com/onflow/flow-go/ledger/complete/mtrie/node" 17 "github.com/onflow/flow-go/ledger/complete/mtrie/trie" 18 ) 19 20 // ErrEOFNotReached for indicating end of file not reached error 21 var ErrEOFNotReached = errors.New("expect to reach EOF, but actually didn't") 22 23 func ReadTriesRootHash(logger zerolog.Logger, dir string, fileName string) ( 24 []ledger.RootHash, 25 error, 26 ) { 27 err := validateCheckpointFile(logger, dir, fileName) 28 if err != nil { 29 return nil, err 30 } 31 return readTriesRootHash(logger, dir, fileName) 32 } 33 34 var CheckpointHasRootHash = checkpointHasRootHash 35 36 // readCheckpointV6 reads checkpoint file from a main file and 17 file parts. 37 // the main file stores: 38 // - version 39 // - checksum of each part file (17 in total) 40 // - checksum of the main file itself 41 // the first 16 files parts contain the trie nodes below the subtrieLevel 42 // the last part file contains the top level trie nodes above the subtrieLevel and all the trie root nodes. 43 // 44 // it returns (tries, nil) if there was no error 45 // it returns (nil, os.ErrNotExist) if a certain file is missing, use (os.IsNotExist to check) 46 // it returns (nil, ErrEOFNotReached) if a certain part file is malformed 47 // it returns (nil, err) if running into any exception 48 func readCheckpointV6(headerFile *os.File, logger zerolog.Logger) ([]*trie.MTrie, error) { 49 // the full path of header file 50 headerPath := headerFile.Name() 51 dir, fileName := filepath.Split(headerPath) 52 53 lg := logger.With().Str("checkpoint_file", headerPath).Logger() 54 lg.Info().Msgf("reading v6 checkpoint file") 55 56 subtrieChecksums, topTrieChecksum, err := readCheckpointHeader(headerPath, logger) 57 if err != nil { 58 return nil, fmt.Errorf("could not read header: %w", err) 59 } 60 61 // ensure all checkpoint part file exists, might return os.ErrNotExist error 62 // if a file is missing 63 err = allPartFileExist(dir, fileName, len(subtrieChecksums)) 64 if err != nil { 65 return nil, fmt.Errorf("fail to check all checkpoint part file exist: %w", err) 66 } 67 68 // TODO making number of goroutine configable for reading subtries, which can help us 69 // test the code on machines that don't have as much RAM as EN by using fewer goroutines. 70 subtrieNodes, err := readSubTriesConcurrently(dir, fileName, subtrieChecksums, lg) 71 if err != nil { 72 return nil, fmt.Errorf("could not read subtrie from dir: %w", err) 73 } 74 75 lg.Info().Uint32("topsum", topTrieChecksum). 76 Msg("finish reading all v6 subtrie files, start reading top level tries") 77 78 tries, err := readTopLevelTries(dir, fileName, subtrieNodes, topTrieChecksum, lg) 79 if err != nil { 80 return nil, fmt.Errorf("could not read top level nodes or tries: %w", err) 81 } 82 83 lg.Info().Msgf("finish reading all trie roots, trie root count: %v", len(tries)) 84 85 if len(tries) > 0 { 86 first, last := tries[0], tries[len(tries)-1] 87 logger.Info(). 88 Str("first_hash", first.RootHash().String()). 89 Uint64("first_reg_count", first.AllocatedRegCount()). 90 Str("last_hash", last.RootHash().String()). 91 Uint64("last_reg_count", last.AllocatedRegCount()). 92 Int("version", 6). 93 Msg("checkpoint tries roots") 94 } 95 96 return tries, nil 97 } 98 99 // OpenAndReadCheckpointV6 open the checkpoint file and read it with readCheckpointV6 100 func OpenAndReadCheckpointV6(dir string, fileName string, logger zerolog.Logger) ( 101 triesToReturn []*trie.MTrie, 102 errToReturn error, 103 ) { 104 105 filepath := filePathCheckpointHeader(dir, fileName) 106 errToReturn = withFile(logger, filepath, func(file *os.File) error { 107 tries, err := readCheckpointV6(file, logger) 108 if err != nil { 109 return err 110 } 111 triesToReturn = tries 112 return nil 113 }) 114 115 return triesToReturn, errToReturn 116 } 117 118 // ReadCheckpointFileSize returns the total size of the checkpoint file 119 func ReadCheckpointFileSize(dir string, fileName string) (uint64, error) { 120 paths := allFilePaths(dir, fileName) 121 totalSize := uint64(0) 122 for _, path := range paths { 123 fileInfo, err := os.Stat(path) 124 if err != nil { 125 return 0, fmt.Errorf("could not get file info for %v: %w", path, err) 126 } 127 128 totalSize += uint64(fileInfo.Size()) 129 } 130 131 return totalSize, nil 132 } 133 134 func allFilePaths(dir string, fileName string) []string { 135 paths := make([]string, 0, 1+subtrieCount+1) 136 paths = append(paths, filePathCheckpointHeader(dir, fileName)) 137 for i := 0; i < subtrieCount; i++ { 138 subTriePath, _, _ := filePathSubTries(dir, fileName, i) 139 paths = append(paths, subTriePath) 140 } 141 topTriePath, _ := filePathTopTries(dir, fileName) 142 paths = append(paths, topTriePath) 143 return paths 144 } 145 146 func filePathCheckpointHeader(dir string, fileName string) string { 147 return path.Join(dir, fileName) 148 } 149 150 func filePathSubTries(dir string, fileName string, index int) (string, string, error) { 151 if index < 0 || index > (subtrieCount-1) { 152 return "", "", fmt.Errorf("index must be between 0 to %v, but got %v", subtrieCount-1, index) 153 } 154 subTrieFileName := partFileName(fileName, index) 155 return path.Join(dir, subTrieFileName), subTrieFileName, nil 156 } 157 158 func filePathTopTries(dir string, fileName string) (string, string) { 159 topTriesFileName := partFileName(fileName, subtrieCount) 160 return path.Join(dir, topTriesFileName), topTriesFileName 161 } 162 163 func partFileName(fileName string, index int) string { 164 return fmt.Sprintf("%v.%03d", fileName, index) 165 } 166 167 func filePathPattern(dir string, fileName string) string { 168 return fmt.Sprintf("%v*", filePathCheckpointHeader(dir, fileName)) 169 } 170 171 // readCheckpointHeader takes a file path and returns subtrieChecksums and topTrieChecksum 172 // any error returned are exceptions 173 func readCheckpointHeader(filepath string, logger zerolog.Logger) ( 174 checksumsOfSubtries []uint32, 175 checksumOfTopTrie uint32, 176 errToReturn error, 177 ) { 178 closable, err := os.Open(filepath) 179 if err != nil { 180 return nil, 0, fmt.Errorf("could not open header file: %w", err) 181 } 182 183 defer func(file *os.File) { 184 evictErr := evictFileFromLinuxPageCache(file, false, logger) 185 if evictErr != nil { 186 logger.Warn().Msgf("failed to evict header file %s from Linux page cache: %s", filepath, evictErr) 187 // No need to return this error because it's possible to continue normal operations. 188 } 189 errToReturn = closeAndMergeError(file, errToReturn) 190 }(closable) 191 192 var bufReader io.Reader = bufio.NewReaderSize(closable, defaultBufioReadSize) 193 reader := NewCRC32Reader(bufReader) 194 // read the magic bytes and check version 195 err = validateFileHeader(MagicBytesCheckpointHeader, VersionV6, reader) 196 if err != nil { 197 return nil, 0, err 198 } 199 200 // read the subtrie count 201 subtrieCount, err := readSubtrieCount(reader) 202 if err != nil { 203 return nil, 0, err 204 } 205 206 subtrieChecksums := make([]uint32, subtrieCount) 207 for i := uint16(0); i < subtrieCount; i++ { 208 sum, err := readCRC32Sum(reader) 209 if err != nil { 210 return nil, 0, fmt.Errorf("could not read %v-th subtrie checksum from checkpoint header: %w", i, err) 211 } 212 subtrieChecksums[i] = sum 213 } 214 215 // read top level trie checksum 216 topTrieChecksum, err := readCRC32Sum(reader) 217 if err != nil { 218 return nil, 0, fmt.Errorf("could not read checkpoint top level trie checksum in chechpoint summary: %w", err) 219 } 220 221 // calculate the actual checksum 222 actualSum := reader.Crc32() 223 224 // read the stored checksum, and compare with the actual sum 225 expectedSum, err := readCRC32Sum(reader) 226 if err != nil { 227 return nil, 0, fmt.Errorf("could not read checkpoint header checksum: %w", err) 228 } 229 230 if actualSum != expectedSum { 231 return nil, 0, fmt.Errorf("invalid checksum in checkpoint header, expected %v, actual %v", 232 expectedSum, actualSum) 233 } 234 235 err = ensureReachedEOF(reader) 236 if err != nil { 237 return nil, 0, fmt.Errorf("fail to read checkpoint header file: %w", err) 238 } 239 240 return subtrieChecksums, topTrieChecksum, nil 241 } 242 243 // allPartFileExist check if all the part files of the checkpoint file exist 244 // it returns nil if all files exist 245 // it returns os.ErrNotExist if some file is missing, use (os.IsNotExist to check) 246 // it returns err if running into any exception 247 func allPartFileExist(dir string, fileName string, totalSubtrieFiles int) error { 248 matched, err := findCheckpointPartFiles(dir, fileName) 249 if err != nil { 250 return fmt.Errorf("could not check all checkpoint part file exist: %w", err) 251 } 252 253 // header + subtrie files + top level file 254 if len(matched) != 1+totalSubtrieFiles+1 { 255 return fmt.Errorf("some checkpoint part file is missing. found part files %v. err :%w", 256 matched, os.ErrNotExist) 257 } 258 259 return nil 260 } 261 262 // findCheckpointPartFiles returns a slice of file full paths of the part files for the checkpoint file 263 // with the given fileName under the given folder. 264 // - it return the matching part files, note it might not contains all the part files. 265 // - it return error if running any exception 266 func findCheckpointPartFiles(dir string, fileName string) ([]string, error) { 267 pattern := filePathPattern(dir, fileName) 268 matched, err := filepath.Glob(pattern) 269 if err != nil { 270 return nil, fmt.Errorf("could not find checkpoint files: %w", err) 271 } 272 273 // build a lookup with matched 274 lookup := make(map[string]struct{}) 275 for _, match := range matched { 276 lookup[match] = struct{}{} 277 } 278 279 headerPath := filePathCheckpointHeader(dir, fileName) 280 parts := make([]string, 0) 281 // check header exists 282 _, ok := lookup[headerPath] 283 if ok { 284 parts = append(parts, headerPath) 285 delete(lookup, headerPath) 286 } 287 288 // check all subtrie parts 289 for i := 0; i < subtrieCount; i++ { 290 subtriePath, _, err := filePathSubTries(dir, fileName, i) 291 if err != nil { 292 return nil, err 293 } 294 _, ok := lookup[subtriePath] 295 if ok { 296 parts = append(parts, subtriePath) 297 delete(lookup, subtriePath) 298 } 299 } 300 301 // check top level trie part file 302 toplevelPath, _ := filePathTopTries(dir, fileName) 303 304 _, ok = lookup[toplevelPath] 305 if ok { 306 parts = append(parts, toplevelPath) 307 delete(lookup, toplevelPath) 308 } 309 310 return parts, nil 311 } 312 313 type jobReadSubtrie struct { 314 Index int 315 Checksum uint32 316 Result chan<- *resultReadSubTrie 317 } 318 319 type resultReadSubTrie struct { 320 Nodes []*node.Node 321 Err error 322 } 323 324 func readSubTriesConcurrently(dir string, fileName string, subtrieChecksums []uint32, logger zerolog.Logger) ([][]*node.Node, error) { 325 326 numOfSubTries := len(subtrieChecksums) 327 jobs := make(chan jobReadSubtrie, numOfSubTries) 328 resultChs := make([]<-chan *resultReadSubTrie, numOfSubTries) 329 330 // push all jobs into the channel 331 for i, checksum := range subtrieChecksums { 332 resultCh := make(chan *resultReadSubTrie) 333 resultChs[i] = resultCh 334 jobs <- jobReadSubtrie{ 335 Index: i, 336 Checksum: checksum, 337 Result: resultCh, 338 } 339 } 340 close(jobs) 341 342 // TODO: make nWorker configable 343 nWorker := numOfSubTries // use as many worker as the jobs to read subtries concurrently 344 for i := 0; i < nWorker; i++ { 345 go func() { 346 for job := range jobs { 347 nodes, err := readCheckpointSubTrie(dir, fileName, job.Index, job.Checksum, logger) 348 job.Result <- &resultReadSubTrie{ 349 Nodes: nodes, 350 Err: err, 351 } 352 close(job.Result) 353 } 354 }() 355 } 356 357 // reading job results in the same order as their indices 358 nodesGroups := make([][]*node.Node, 0, len(resultChs)) 359 for i, resultCh := range resultChs { 360 result := <-resultCh 361 if result.Err != nil { 362 return nil, fmt.Errorf("fail to read %v-th subtrie, trie: %w", i, result.Err) 363 } 364 365 nodesGroups = append(nodesGroups, result.Nodes) 366 } 367 368 return nodesGroups, nil 369 } 370 371 func readCheckpointSubTrie(dir string, fileName string, index int, checksum uint32, logger zerolog.Logger) ( 372 []*node.Node, 373 error, 374 ) { 375 var nodes []*node.Node 376 err := processCheckpointSubTrie(dir, fileName, index, checksum, logger, 377 func(reader *Crc32Reader, nodesCount uint64) error { 378 scratch := make([]byte, 1024*4) // must not be less than 1024 379 380 nodes = make([]*node.Node, nodesCount+1) //+1 for 0 index meaning nil 381 logging := logProgress(fmt.Sprintf("reading %v-th sub trie roots", index), int(nodesCount), logger) 382 for i := uint64(1); i <= nodesCount; i++ { 383 node, err := flattener.ReadNode(reader, scratch, func(nodeIndex uint64) (*node.Node, error) { 384 if nodeIndex >= i { 385 return nil, fmt.Errorf("sequence of serialized nodes does not satisfy Descendents-First-Relationship") 386 } 387 return nodes[nodeIndex], nil 388 }) 389 if err != nil { 390 return fmt.Errorf("cannot read node %d: %w", i, err) 391 } 392 nodes[i] = node 393 logging(i) 394 } 395 return nil 396 }) 397 398 if err != nil { 399 return nil, err 400 } 401 402 // since nodes[0] is always `nil`, returning a slice without nodes[0] could simplify the 403 // implementation of getNodeByIndex 404 // return nodes[1:], nil 405 return nodes[1:], nil 406 } 407 408 // subtrie file contains: 409 // 1. checkpoint version 410 // 2. nodes 411 // 3. node count 412 // 4. checksum 413 func processCheckpointSubTrie( 414 dir string, 415 fileName string, 416 index int, 417 checksum uint32, 418 logger zerolog.Logger, 419 processNode func(*Crc32Reader, uint64) error, 420 ) error { 421 422 filepath, _, err := filePathSubTries(dir, fileName, index) 423 if err != nil { 424 return err 425 } 426 return withFile(logger, filepath, func(f *os.File) error { 427 // valite the magic bytes and version 428 err := validateFileHeader(MagicBytesCheckpointSubtrie, VersionV6, f) 429 if err != nil { 430 return err 431 } 432 433 nodesCount, expectedSum, err := readSubTriesFooter(f) 434 if err != nil { 435 return fmt.Errorf("cannot read sub trie node count: %w", err) 436 } 437 438 if checksum != expectedSum { 439 return fmt.Errorf("mismatch checksum in subtrie file. checksum from checkpoint header %v does not "+ 440 "match with the checksum in subtrie file %v", checksum, expectedSum) 441 } 442 443 // restart from the beginning of the file, make sure Crc32Reader has seen all the bytes 444 // in order to compute the correct checksum 445 _, err = f.Seek(0, io.SeekStart) 446 if err != nil { 447 return fmt.Errorf("cannot seek to start of file: %w", err) 448 } 449 450 reader := NewCRC32Reader(bufio.NewReaderSize(f, defaultBufioReadSize)) 451 452 // read version again for calculating checksum 453 _, _, err = readFileHeader(reader) 454 if err != nil { 455 return fmt.Errorf("could not read version again for subtrie: %w", err) 456 } 457 458 // read file part index and verify 459 460 err = processNode(reader, nodesCount) 461 if err != nil { 462 return err 463 } 464 465 scratch := make([]byte, 1024) 466 // read footer and discard, since we only care about checksum 467 _, err = io.ReadFull(reader, scratch[:encNodeCountSize]) 468 if err != nil { 469 return fmt.Errorf("cannot read footer: %w", err) 470 } 471 472 // calculate the actual checksum 473 actualSum := reader.Crc32() 474 475 if actualSum != expectedSum { 476 return fmt.Errorf("invalid checksum in subtrie checkpoint, expected %v, actual %v", 477 expectedSum, actualSum) 478 } 479 480 // read the checksum and discard, since we only care about whether ensureReachedEOF 481 _, err = io.ReadFull(reader, scratch[:crc32SumSize]) 482 if err != nil { 483 return fmt.Errorf("could not read subtrie file's checksum: %w", err) 484 } 485 486 err = ensureReachedEOF(reader) 487 if err != nil { 488 return fmt.Errorf("fail to read %v-th sutrie file: %w", index, err) 489 } 490 491 return nil 492 }) 493 } 494 495 func readSubTriesFooter(f *os.File) (uint64, uint32, error) { 496 const footerSize = encNodeCountSize // footer doesn't include crc32 sum 497 const footerOffset = footerSize + crc32SumSize 498 _, err := f.Seek(-footerOffset, io.SeekEnd) 499 if err != nil { 500 return 0, 0, fmt.Errorf("cannot seek to footer: %w", err) 501 } 502 503 footer := make([]byte, footerSize) 504 _, err = io.ReadFull(f, footer) 505 if err != nil { 506 return 0, 0, fmt.Errorf("could not read footer: %w", err) 507 } 508 509 nodeCount, err := decodeNodeCount(footer) 510 if err != nil { 511 return 0, 0, fmt.Errorf("could not decode subtrie node count: %w", err) 512 } 513 514 // the subtrie checksum from the checkpoint header file must be same 515 // as the checksum included in the subtrie file 516 expectedSum, err := readCRC32Sum(f) 517 if err != nil { 518 return 0, 0, fmt.Errorf("cannot read checksum for sub trie file: %w", err) 519 } 520 521 return nodeCount, expectedSum, nil 522 } 523 524 // 17th part file contains: 525 // 1. checkpoint version 526 // 2. subtrieNodeCount 527 // 3. top level nodes 528 // 4. trie roots 529 // 5. node count 530 // 6. trie count 531 // 7. checksum 532 func readTopLevelTries(dir string, fileName string, subtrieNodes [][]*node.Node, topTrieChecksum uint32, logger zerolog.Logger) ( 533 rootTriesToReturn []*trie.MTrie, 534 errToReturn error, 535 ) { 536 537 filepath, _ := filePathTopTries(dir, fileName) 538 errToReturn = withFile(logger, filepath, func(file *os.File) error { 539 // read and validate magic bytes and version 540 err := validateFileHeader(MagicBytesCheckpointToptrie, VersionV6, file) 541 if err != nil { 542 return err 543 } 544 545 // read subtrie Node count and validate 546 topLevelNodesCount, triesCount, expectedSum, err := readTopTriesFooter(file) 547 if err != nil { 548 return fmt.Errorf("could not read top tries footer: %w", err) 549 } 550 551 if topTrieChecksum != expectedSum { 552 return fmt.Errorf("mismatch top trie checksum, header file has %v, toptrie file has %v", 553 topTrieChecksum, expectedSum) 554 } 555 556 // restart from the beginning of the file, make sure CRC32Reader has seen all the bytes 557 // in order to compute the correct checksum 558 _, err = file.Seek(0, io.SeekStart) 559 if err != nil { 560 return fmt.Errorf("could not seek to 0: %w", err) 561 } 562 563 reader := NewCRC32Reader(bufio.NewReaderSize(file, defaultBufioReadSize)) 564 565 // read version again for calculating checksum 566 _, _, err = readFileHeader(reader) 567 if err != nil { 568 return fmt.Errorf("could not read version for top trie: %w", err) 569 } 570 571 // read subtrie count and validate 572 buf := make([]byte, encNodeCountSize) 573 _, err = io.ReadFull(reader, buf) 574 if err != nil { 575 return fmt.Errorf("could not read subtrie node count: %w", err) 576 } 577 readSubtrieNodeCount, err := decodeNodeCount(buf) 578 if err != nil { 579 return fmt.Errorf("could not decode node count: %w", err) 580 } 581 582 totalSubTrieNodeCount := computeTotalSubTrieNodeCount(subtrieNodes) 583 584 if readSubtrieNodeCount != totalSubTrieNodeCount { 585 return fmt.Errorf("mismatch subtrie node count, read from disk (%v), but got actual node count (%v)", 586 readSubtrieNodeCount, totalSubTrieNodeCount) 587 } 588 589 topLevelNodes := make([]*node.Node, topLevelNodesCount+1) //+1 for 0 index meaning nil 590 tries := make([]*trie.MTrie, triesCount) 591 592 // Scratch buffer is used as temporary buffer that reader can read into. 593 // Raw data in scratch buffer should be copied or converted into desired 594 // objects before next Read operation. If the scratch buffer isn't large 595 // enough, a new buffer will be allocated. However, 4096 bytes will 596 // be large enough to handle almost all payloads and 100% of interim nodes. 597 scratch := make([]byte, 1024*4) // must not be less than 1024 598 599 // read the nodes from subtrie level to the root level 600 for i := uint64(1); i <= topLevelNodesCount; i++ { 601 node, err := flattener.ReadNode(reader, scratch, func(nodeIndex uint64) (*node.Node, error) { 602 if nodeIndex >= i+uint64(totalSubTrieNodeCount) { 603 return nil, fmt.Errorf("sequence of serialized nodes does not satisfy Descendents-First-Relationship") 604 } 605 606 return getNodeByIndex(subtrieNodes, totalSubTrieNodeCount, topLevelNodes, nodeIndex) 607 }) 608 if err != nil { 609 return fmt.Errorf("cannot read node at index %d: %w", i, err) 610 } 611 612 topLevelNodes[i] = node 613 } 614 615 // read the trie root nodes 616 for i := uint16(0); i < triesCount; i++ { 617 trie, err := flattener.ReadTrie(reader, scratch, func(nodeIndex uint64) (*node.Node, error) { 618 return getNodeByIndex(subtrieNodes, totalSubTrieNodeCount, topLevelNodes, nodeIndex) 619 }) 620 621 if err != nil { 622 return fmt.Errorf("cannot read root trie at index %d: %w", i, err) 623 } 624 tries[i] = trie 625 } 626 627 // read footer and discard, since we only care about checksum 628 _, err = io.ReadFull(reader, scratch[:encNodeCountSize+encTrieCountSize]) 629 if err != nil { 630 return fmt.Errorf("cannot read footer: %w", err) 631 } 632 633 actualSum := reader.Crc32() 634 635 if actualSum != expectedSum { 636 return fmt.Errorf("invalid checksum in top level trie, expected %v, actual %v", 637 expectedSum, actualSum) 638 } 639 640 // read the checksum and discard, since we only care about whether ensureReachedEOF 641 _, err = io.ReadFull(reader, scratch[:crc32SumSize]) 642 if err != nil { 643 return fmt.Errorf("could not read checksum from top trie file: %w", err) 644 } 645 646 err = ensureReachedEOF(reader) 647 if err != nil { 648 return fmt.Errorf("fail to read top trie file: %w", err) 649 } 650 651 rootTriesToReturn = tries 652 return nil 653 }) 654 return rootTriesToReturn, errToReturn 655 } 656 657 func readTriesRootHash(logger zerolog.Logger, dir string, fileName string) ( 658 trieRootsToReturn []ledger.RootHash, 659 errToReturn error, 660 ) { 661 662 filepath, _ := filePathTopTries(dir, fileName) 663 errToReturn = withFile(logger, filepath, func(file *os.File) error { 664 var err error 665 666 // read and validate magic bytes and version 667 err = validateFileHeader(MagicBytesCheckpointToptrie, VersionV6, file) 668 if err != nil { 669 return err 670 } 671 672 // read subtrie Node count and validate 673 _, triesCount, _, err := readTopTriesFooter(file) 674 if err != nil { 675 return fmt.Errorf("could not read top tries footer: %w", err) 676 } 677 678 footerOffset := encNodeCountSize + encTrieCountSize + crc32SumSize 679 trieRootOffset := footerOffset + flattener.EncodedTrieSize*int(triesCount) 680 681 _, err = file.Seek(int64(-trieRootOffset), io.SeekEnd) 682 if err != nil { 683 return fmt.Errorf("could not seek to 0: %w", err) 684 } 685 686 reader := bufio.NewReaderSize(file, defaultBufioReadSize) 687 trieRoots := make([]ledger.RootHash, 0, triesCount) 688 scratch := make([]byte, 1024*4) // must not be less than 1024 689 for i := 0; i < int(triesCount); i++ { 690 trieRootNode, err := flattener.ReadEncodedTrie(reader, scratch) 691 if err != nil { 692 return fmt.Errorf("could not read trie root node: %w", err) 693 } 694 695 trieRoots = append(trieRoots, ledger.RootHash(trieRootNode.RootHash)) 696 } 697 698 trieRootsToReturn = trieRoots 699 return nil 700 }) 701 return trieRootsToReturn, errToReturn 702 } 703 704 // checkpointHasRootHash check if the given checkpoint file contains the expected root hash 705 func checkpointHasRootHash(logger zerolog.Logger, bootstrapDir, filename string, expectedRootHash ledger.RootHash) error { 706 roots, err := ReadTriesRootHash(logger, bootstrapDir, filename) 707 if err != nil { 708 return fmt.Errorf("could not read checkpoint root hash: %w", err) 709 } 710 711 if len(roots) == 0 { 712 return fmt.Errorf("no root hash found in checkpoint file") 713 } 714 715 for i, root := range roots { 716 if root == expectedRootHash { 717 logger.Info().Msgf("found matching checkpoint root hash at index: %v, checkpoint total trie roots: %v", 718 i, len(roots)) 719 // found the expected commit 720 return nil 721 } 722 } 723 724 return fmt.Errorf("could not find expected root hash %v in checkpoint file which contains: %v ", expectedRootHash, roots) 725 } 726 727 func readFileHeader(reader io.Reader) (uint16, uint16, error) { 728 bytes := make([]byte, encMagicSize+encVersionSize) 729 _, err := io.ReadFull(reader, bytes) 730 if err != nil { 731 return 0, 0, fmt.Errorf("cannot read magic ID and version: %w", err) 732 } 733 return decodeVersion(bytes) 734 } 735 736 func validateFileHeader(expectedMagic uint16, expectedVersion uint16, reader io.Reader) error { 737 magic, version, err := readFileHeader(reader) 738 if err != nil { 739 return err 740 } 741 742 if magic != expectedMagic { 743 return fmt.Errorf("wrong magic bytes, expect %#x, bot got: %#x", expectedMagic, magic) 744 } 745 746 if version != expectedVersion { 747 return fmt.Errorf("wrong version, expect %v, bot got: %v", expectedVersion, version) 748 } 749 750 return nil 751 } 752 753 func readSubtrieCount(reader io.Reader) (uint16, error) { 754 bytes := make([]byte, encSubtrieCountSize) 755 _, err := io.ReadFull(reader, bytes) 756 if err != nil { 757 return 0, err 758 } 759 return decodeSubtrieCount(bytes) 760 } 761 762 func readCRC32Sum(reader io.Reader) (uint32, error) { 763 bytes := make([]byte, crc32SumSize) 764 _, err := io.ReadFull(reader, bytes) 765 if err != nil { 766 return 0, err 767 } 768 return decodeCRC32Sum(bytes) 769 } 770 771 func readTopTriesFooter(f *os.File) (uint64, uint16, uint32, error) { 772 // footer offset: nodes count (8 bytes) + tries count (2 bytes) + CRC32 sum (4 bytes) 773 const footerOffset = encNodeCountSize + encTrieCountSize + crc32SumSize 774 const footerSize = encNodeCountSize + encTrieCountSize // footer doesn't include crc32 sum 775 // Seek to footer 776 _, err := f.Seek(-footerOffset, io.SeekEnd) 777 if err != nil { 778 return 0, 0, 0, fmt.Errorf("cannot seek to footer: %w", err) 779 } 780 footer := make([]byte, footerSize) 781 _, err = io.ReadFull(f, footer) 782 if err != nil { 783 return 0, 0, 0, fmt.Errorf("cannot read footer: %w", err) 784 } 785 786 nodeCount, trieCount, err := decodeTopLevelNodesAndTriesFooter(footer) 787 if err != nil { 788 return 0, 0, 0, fmt.Errorf("could not decode top trie footer: %w", err) 789 } 790 791 checksum, err := readCRC32Sum(f) 792 if err != nil { 793 return 0, 0, 0, fmt.Errorf("cannot read checksum for top trie file: %w", err) 794 } 795 return nodeCount, trieCount, checksum, nil 796 } 797 798 func computeTotalSubTrieNodeCount(groups [][]*node.Node) uint64 { 799 total := 0 800 for _, group := range groups { 801 total += len(group) 802 } 803 return uint64(total) 804 } 805 806 // get a node by node index. 807 // Note: node index start from 1. 808 // subtries contains subtrie node groups. subtries[i][0] is NOT nil. 809 // topLevelNodes contains top level nodes. topLevelNodes[0] is nil. 810 // any error returned are exceptions 811 func getNodeByIndex(subtrieNodes [][]*node.Node, totalSubTrieNodeCount uint64, topLevelNodes []*node.Node, index uint64) (*node.Node, error) { 812 if index == 0 { 813 // item at index 0 is for nil 814 return nil, nil 815 } 816 817 if index > totalSubTrieNodeCount { 818 return getTopNodeByIndex(totalSubTrieNodeCount, topLevelNodes, index) 819 } 820 821 offset := index - 1 // index > 0, won't underflow 822 for _, subtries := range subtrieNodes { 823 if int(offset) < len(subtries) { 824 return subtries[offset], nil 825 } 826 827 offset -= uint64(len(subtries)) 828 } 829 830 return nil, fmt.Errorf("could not find node by index %v, totalSubTrieNodeCount %v", index, totalSubTrieNodeCount) 831 } 832 833 func getTopNodeByIndex(totalSubTrieNodeCount uint64, topLevelNodes []*node.Node, index uint64) (*node.Node, error) { 834 nodePos := index - totalSubTrieNodeCount 835 836 if nodePos >= uint64(len(topLevelNodes)) { 837 return nil, fmt.Errorf("can not find node by index %v, nodePos >= len(topLevelNodes) => (%v > %v)", 838 index, nodePos, len(topLevelNodes)) 839 } 840 841 return topLevelNodes[nodePos], nil 842 } 843 844 // ensureReachedEOF checks if the reader has reached end of file 845 // it returns nil if reached EOF 846 // it returns ErrEOFNotReached if didn't reach end of file 847 // any error returned are exception 848 func ensureReachedEOF(reader io.Reader) error { 849 b := make([]byte, 1) 850 _, err := reader.Read(b) 851 if errors.Is(err, io.EOF) { 852 return nil 853 } 854 855 if err == nil { 856 return ErrEOFNotReached 857 } 858 859 return fmt.Errorf("fail to check if reached EOF: %w", err) 860 } 861 862 func validateCheckpointFile(logger zerolog.Logger, dir, fileName string) error { 863 headerPath := filePathCheckpointHeader(dir, fileName) 864 // validate header file 865 subtrieChecksums, topTrieChecksum, err := readCheckpointHeader(headerPath, logger) 866 if err != nil { 867 return err 868 } 869 870 // validate subtrie files 871 for index, expectedSum := range subtrieChecksums { 872 filepath, _, err := filePathSubTries(dir, fileName, index) 873 if err != nil { 874 return err 875 } 876 err = withFile(logger, filepath, func(f *os.File) error { 877 _, checksum, err := readSubTriesFooter(f) 878 if err != nil { 879 return fmt.Errorf("cannot read sub trie node count: %w", err) 880 } 881 882 if checksum != expectedSum { 883 return fmt.Errorf("mismatch checksum in subtrie file. checksum from checkpoint header %v does not "+ 884 "match with the checksum in subtrie file %v", checksum, expectedSum) 885 } 886 return nil 887 }) 888 889 if err != nil { 890 return err 891 } 892 } 893 894 // validate top trie file 895 filepath, _ := filePathTopTries(dir, fileName) 896 err = withFile(logger, filepath, func(file *os.File) error { 897 // read subtrie Node count and validate 898 _, _, checkSum, err := readTopTriesFooter(file) 899 if err != nil { 900 return err 901 } 902 903 if topTrieChecksum != checkSum { 904 return fmt.Errorf("mismatch top trie checksum, header file has %v, toptrie file has %v", 905 topTrieChecksum, checkSum) 906 } 907 908 return nil 909 }) 910 if err != nil { 911 return err 912 } 913 914 return nil 915 }