github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/storage/pebble/bootstrap.go (about)

     1  package pebble
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"path/filepath"
     8  	"time"
     9  
    10  	"github.com/cockroachdb/pebble"
    11  	"github.com/rs/zerolog"
    12  	"go.uber.org/atomic"
    13  	"golang.org/x/sync/errgroup"
    14  
    15  	"github.com/onflow/flow-go/ledger"
    16  	"github.com/onflow/flow-go/ledger/common/convert"
    17  	"github.com/onflow/flow-go/ledger/complete/wal"
    18  )
    19  
    20  // ErrAlreadyBootstrapped is the sentinel error for an already bootstrapped pebble instance
    21  var ErrAlreadyBootstrapped = errors.New("found latest key set on badger instance, DB is already bootstrapped")
    22  
    23  type RegisterBootstrap struct {
    24  	log                zerolog.Logger
    25  	db                 *pebble.DB
    26  	checkpointDir      string
    27  	checkpointFileName string
    28  	leafNodeChan       chan *wal.LeafNode
    29  	rootHeight         uint64
    30  	rootHash           ledger.RootHash
    31  	registerCount      *atomic.Uint64
    32  }
    33  
    34  // NewRegisterBootstrap creates the bootstrap object for reading checkpoint data and the height tracker in pebble
    35  // This object must be initialized and RegisterBootstrap.IndexCheckpointFile must be run to have the pebble db instance
    36  // in the correct state to initialize a Registers store.
    37  func NewRegisterBootstrap(
    38  	db *pebble.DB,
    39  	checkpointFile string,
    40  	rootHeight uint64,
    41  	rootHash ledger.RootHash,
    42  	log zerolog.Logger,
    43  ) (*RegisterBootstrap, error) {
    44  	// check for pre-populated heights, fail if it is populated
    45  	// i.e. the IndexCheckpointFile function has already run for the db in this directory
    46  	isBootstrapped, err := IsBootstrapped(db)
    47  	if err != nil {
    48  		return nil, err
    49  	}
    50  	if isBootstrapped {
    51  		// key detected, attempt to run bootstrap on corrupt or already bootstrapped data
    52  		return nil, ErrAlreadyBootstrapped
    53  	}
    54  
    55  	checkpointDir, checkpointFileName := filepath.Split(checkpointFile)
    56  	return &RegisterBootstrap{
    57  		log:                log.With().Str("module", "register_bootstrap").Logger(),
    58  		db:                 db,
    59  		checkpointDir:      checkpointDir,
    60  		checkpointFileName: checkpointFileName,
    61  		leafNodeChan:       make(chan *wal.LeafNode, checkpointLeafNodeBufSize),
    62  		rootHeight:         rootHeight,
    63  		rootHash:           rootHash,
    64  		registerCount:      atomic.NewUint64(0),
    65  	}, nil
    66  }
    67  
    68  func (b *RegisterBootstrap) batchIndexRegisters(leafNodes []*wal.LeafNode) error {
    69  	batch := b.db.NewBatch()
    70  	defer batch.Close()
    71  
    72  	b.log.Trace().Int("batch_size", len(leafNodes)).Msg("indexing batch of leaf nodes")
    73  	for _, register := range leafNodes {
    74  		payload := register.Payload
    75  		key, err := payload.Key()
    76  		if err != nil {
    77  			return fmt.Errorf("could not get key from register payload: %w", err)
    78  		}
    79  
    80  		registerID, err := convert.LedgerKeyToRegisterID(key)
    81  		if err != nil {
    82  			return fmt.Errorf("could not get register ID from key: %w", err)
    83  		}
    84  
    85  		encoded := newLookupKey(b.rootHeight, registerID).Bytes()
    86  		err = batch.Set(encoded, payload.Value(), nil)
    87  		if err != nil {
    88  			return fmt.Errorf("failed to set key: %w", err)
    89  		}
    90  	}
    91  
    92  	err := batch.Commit(pebble.Sync)
    93  	if err != nil {
    94  		return fmt.Errorf("failed to commit batch: %w", err)
    95  	}
    96  
    97  	b.registerCount.Add(uint64(len(leafNodes)))
    98  
    99  	return nil
   100  }
   101  
   102  // indexCheckpointFileWorker asynchronously indexes register entries in b.checkpointDir
   103  // with wal.OpenAndReadLeafNodesFromCheckpointV6
   104  func (b *RegisterBootstrap) indexCheckpointFileWorker(ctx context.Context) error {
   105  	b.log.Debug().Msg("started checkpoint index worker")
   106  
   107  	// collect leaf nodes to batch index until the channel is closed
   108  	batch := make([]*wal.LeafNode, 0, pebbleBootstrapRegisterBatchLen)
   109  	for leafNode := range b.leafNodeChan {
   110  		select {
   111  		case <-ctx.Done():
   112  			return nil
   113  		default:
   114  			batch = append(batch, leafNode)
   115  			if len(batch) >= pebbleBootstrapRegisterBatchLen {
   116  				err := b.batchIndexRegisters(batch)
   117  				if err != nil {
   118  					return fmt.Errorf("unable to index registers to pebble in batch: %w", err)
   119  				}
   120  				batch = make([]*wal.LeafNode, 0, pebbleBootstrapRegisterBatchLen)
   121  			}
   122  		}
   123  	}
   124  
   125  	// index the remaining registers if didn't reach a batch length.
   126  	err := b.batchIndexRegisters(batch)
   127  	if err != nil {
   128  		return fmt.Errorf("unable to index remaining registers to pebble: %w", err)
   129  	}
   130  	return nil
   131  }
   132  
   133  // IndexCheckpointFile indexes the checkpoint file in the Dir provided
   134  func (b *RegisterBootstrap) IndexCheckpointFile(ctx context.Context, workerCount int) error {
   135  	cct, cancel := context.WithCancel(ctx)
   136  	defer cancel()
   137  
   138  	// validate the checkpoint has correct root hash
   139  	err := wal.CheckpointHasRootHash(b.log, b.checkpointDir, b.checkpointFileName, b.rootHash)
   140  	if err != nil {
   141  		return fmt.Errorf("the root checkpoint to have the trie root hash %v does not match with the root state commitment: %w", b.rootHash, err)
   142  	}
   143  
   144  	g, gCtx := errgroup.WithContext(cct)
   145  
   146  	start := time.Now()
   147  	b.log.Info().Msgf("indexing registers from checkpoint with %v worker", workerCount)
   148  	for i := 0; i < workerCount; i++ {
   149  		g.Go(func() error {
   150  			return b.indexCheckpointFileWorker(gCtx)
   151  		})
   152  	}
   153  
   154  	err = wal.OpenAndReadLeafNodesFromCheckpointV6(b.leafNodeChan, b.checkpointDir, b.checkpointFileName, b.log)
   155  	if err != nil {
   156  		return fmt.Errorf("error reading leaf node: %w", err)
   157  	}
   158  
   159  	if err = g.Wait(); err != nil {
   160  		return fmt.Errorf("failed to index checkpoint file: %w", err)
   161  	}
   162  
   163  	err = initHeights(b.db, b.rootHeight)
   164  	if err != nil {
   165  		return fmt.Errorf("could not index latest height: %w", err)
   166  	}
   167  
   168  	b.log.Info().
   169  		Uint64("root_height", b.rootHeight).
   170  		Uint64("register_count", b.registerCount.Load()).
   171  		// note: not using Dur() since default units are ms and this duration is long
   172  		Str("duration", fmt.Sprintf("%v", time.Since(start))).
   173  		Msg("checkpoint indexing complete")
   174  
   175  	return nil
   176  }