github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/storage/pebble/bootstrap.go (about) 1 package pebble 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "path/filepath" 8 "time" 9 10 "github.com/cockroachdb/pebble" 11 "github.com/rs/zerolog" 12 "go.uber.org/atomic" 13 "golang.org/x/sync/errgroup" 14 15 "github.com/onflow/flow-go/ledger" 16 "github.com/onflow/flow-go/ledger/common/convert" 17 "github.com/onflow/flow-go/ledger/complete/wal" 18 ) 19 20 // ErrAlreadyBootstrapped is the sentinel error for an already bootstrapped pebble instance 21 var ErrAlreadyBootstrapped = errors.New("found latest key set on badger instance, DB is already bootstrapped") 22 23 type RegisterBootstrap struct { 24 log zerolog.Logger 25 db *pebble.DB 26 checkpointDir string 27 checkpointFileName string 28 leafNodeChan chan *wal.LeafNode 29 rootHeight uint64 30 rootHash ledger.RootHash 31 registerCount *atomic.Uint64 32 } 33 34 // NewRegisterBootstrap creates the bootstrap object for reading checkpoint data and the height tracker in pebble 35 // This object must be initialized and RegisterBootstrap.IndexCheckpointFile must be run to have the pebble db instance 36 // in the correct state to initialize a Registers store. 37 func NewRegisterBootstrap( 38 db *pebble.DB, 39 checkpointFile string, 40 rootHeight uint64, 41 rootHash ledger.RootHash, 42 log zerolog.Logger, 43 ) (*RegisterBootstrap, error) { 44 // check for pre-populated heights, fail if it is populated 45 // i.e. the IndexCheckpointFile function has already run for the db in this directory 46 isBootstrapped, err := IsBootstrapped(db) 47 if err != nil { 48 return nil, err 49 } 50 if isBootstrapped { 51 // key detected, attempt to run bootstrap on corrupt or already bootstrapped data 52 return nil, ErrAlreadyBootstrapped 53 } 54 55 checkpointDir, checkpointFileName := filepath.Split(checkpointFile) 56 return &RegisterBootstrap{ 57 log: log.With().Str("module", "register_bootstrap").Logger(), 58 db: db, 59 checkpointDir: checkpointDir, 60 checkpointFileName: checkpointFileName, 61 leafNodeChan: make(chan *wal.LeafNode, checkpointLeafNodeBufSize), 62 rootHeight: rootHeight, 63 rootHash: rootHash, 64 registerCount: atomic.NewUint64(0), 65 }, nil 66 } 67 68 func (b *RegisterBootstrap) batchIndexRegisters(leafNodes []*wal.LeafNode) error { 69 batch := b.db.NewBatch() 70 defer batch.Close() 71 72 b.log.Trace().Int("batch_size", len(leafNodes)).Msg("indexing batch of leaf nodes") 73 for _, register := range leafNodes { 74 payload := register.Payload 75 key, err := payload.Key() 76 if err != nil { 77 return fmt.Errorf("could not get key from register payload: %w", err) 78 } 79 80 registerID, err := convert.LedgerKeyToRegisterID(key) 81 if err != nil { 82 return fmt.Errorf("could not get register ID from key: %w", err) 83 } 84 85 encoded := newLookupKey(b.rootHeight, registerID).Bytes() 86 err = batch.Set(encoded, payload.Value(), nil) 87 if err != nil { 88 return fmt.Errorf("failed to set key: %w", err) 89 } 90 } 91 92 err := batch.Commit(pebble.Sync) 93 if err != nil { 94 return fmt.Errorf("failed to commit batch: %w", err) 95 } 96 97 b.registerCount.Add(uint64(len(leafNodes))) 98 99 return nil 100 } 101 102 // indexCheckpointFileWorker asynchronously indexes register entries in b.checkpointDir 103 // with wal.OpenAndReadLeafNodesFromCheckpointV6 104 func (b *RegisterBootstrap) indexCheckpointFileWorker(ctx context.Context) error { 105 b.log.Debug().Msg("started checkpoint index worker") 106 107 // collect leaf nodes to batch index until the channel is closed 108 batch := make([]*wal.LeafNode, 0, pebbleBootstrapRegisterBatchLen) 109 for leafNode := range b.leafNodeChan { 110 select { 111 case <-ctx.Done(): 112 return nil 113 default: 114 batch = append(batch, leafNode) 115 if len(batch) >= pebbleBootstrapRegisterBatchLen { 116 err := b.batchIndexRegisters(batch) 117 if err != nil { 118 return fmt.Errorf("unable to index registers to pebble in batch: %w", err) 119 } 120 batch = make([]*wal.LeafNode, 0, pebbleBootstrapRegisterBatchLen) 121 } 122 } 123 } 124 125 // index the remaining registers if didn't reach a batch length. 126 err := b.batchIndexRegisters(batch) 127 if err != nil { 128 return fmt.Errorf("unable to index remaining registers to pebble: %w", err) 129 } 130 return nil 131 } 132 133 // IndexCheckpointFile indexes the checkpoint file in the Dir provided 134 func (b *RegisterBootstrap) IndexCheckpointFile(ctx context.Context, workerCount int) error { 135 cct, cancel := context.WithCancel(ctx) 136 defer cancel() 137 138 // validate the checkpoint has correct root hash 139 err := wal.CheckpointHasRootHash(b.log, b.checkpointDir, b.checkpointFileName, b.rootHash) 140 if err != nil { 141 return fmt.Errorf("the root checkpoint to have the trie root hash %v does not match with the root state commitment: %w", b.rootHash, err) 142 } 143 144 g, gCtx := errgroup.WithContext(cct) 145 146 start := time.Now() 147 b.log.Info().Msgf("indexing registers from checkpoint with %v worker", workerCount) 148 for i := 0; i < workerCount; i++ { 149 g.Go(func() error { 150 return b.indexCheckpointFileWorker(gCtx) 151 }) 152 } 153 154 err = wal.OpenAndReadLeafNodesFromCheckpointV6(b.leafNodeChan, b.checkpointDir, b.checkpointFileName, b.log) 155 if err != nil { 156 return fmt.Errorf("error reading leaf node: %w", err) 157 } 158 159 if err = g.Wait(); err != nil { 160 return fmt.Errorf("failed to index checkpoint file: %w", err) 161 } 162 163 err = initHeights(b.db, b.rootHeight) 164 if err != nil { 165 return fmt.Errorf("could not index latest height: %w", err) 166 } 167 168 b.log.Info(). 169 Uint64("root_height", b.rootHeight). 170 Uint64("register_count", b.registerCount.Load()). 171 // note: not using Dur() since default units are ms and this duration is long 172 Str("duration", fmt.Sprintf("%v", time.Since(start))). 173 Msg("checkpoint indexing complete") 174 175 return nil 176 }