github.com/koko1123/flow-go-1@v0.29.6/module/builder/collection/builder.go (about)

     1  package collection
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"math"
     8  	"time"
     9  
    10  	"github.com/dgraph-io/badger/v3"
    11  	"github.com/rs/zerolog"
    12  	otelTrace "go.opentelemetry.io/otel/trace"
    13  
    14  	"github.com/koko1123/flow-go-1/model/cluster"
    15  	"github.com/koko1123/flow-go-1/model/flow"
    16  	"github.com/koko1123/flow-go-1/module"
    17  	"github.com/koko1123/flow-go-1/module/mempool"
    18  	"github.com/koko1123/flow-go-1/module/trace"
    19  	"github.com/koko1123/flow-go-1/state/fork"
    20  	"github.com/koko1123/flow-go-1/storage"
    21  	"github.com/koko1123/flow-go-1/storage/badger/operation"
    22  	"github.com/koko1123/flow-go-1/storage/badger/procedure"
    23  	"github.com/koko1123/flow-go-1/utils/logging"
    24  )
    25  
    26  // Builder is the builder for collection block payloads. Upon providing a
    27  // payload hash, it also memorizes the payload contents.
    28  //
    29  // NOTE: Builder is NOT safe for use with multiple goroutines. Since the
    30  // HotStuff event loop is the only consumer of this interface and is single
    31  // threaded, this is OK.
    32  type Builder struct {
    33  	db             *badger.DB
    34  	mainHeaders    storage.Headers
    35  	clusterHeaders storage.Headers
    36  	payloads       storage.ClusterPayloads
    37  	transactions   mempool.Transactions
    38  	tracer         module.Tracer
    39  	config         Config
    40  	log            zerolog.Logger
    41  }
    42  
    43  func NewBuilder(db *badger.DB, tracer module.Tracer, mainHeaders storage.Headers, clusterHeaders storage.Headers, payloads storage.ClusterPayloads, transactions mempool.Transactions, log zerolog.Logger, opts ...Opt) (*Builder, error) {
    44  
    45  	b := Builder{
    46  		db:             db,
    47  		tracer:         tracer,
    48  		mainHeaders:    mainHeaders,
    49  		clusterHeaders: clusterHeaders,
    50  		payloads:       payloads,
    51  		transactions:   transactions,
    52  		config:         DefaultConfig(),
    53  		log:            log.With().Str("component", "cluster_builder").Logger(),
    54  	}
    55  
    56  	for _, apply := range opts {
    57  		apply(&b.config)
    58  	}
    59  
    60  	// sanity check config
    61  	if b.config.ExpiryBuffer >= flow.DefaultTransactionExpiry {
    62  		return nil, fmt.Errorf("invalid configured expiry buffer exceeds tx expiry (%d > %d)", b.config.ExpiryBuffer, flow.DefaultTransactionExpiry)
    63  	}
    64  
    65  	return &b, nil
    66  }
    67  
    68  // BuildOn creates a new block built on the given parent. It produces a payload
    69  // that is valid with respect to the un-finalized chain it extends.
    70  func (b *Builder) BuildOn(parentID flow.Identifier, setter func(*flow.Header) error) (*flow.Header, error) {
    71  	var proposal cluster.Block                 // proposal we are building
    72  	var parent flow.Header                     // parent of the proposal we are building
    73  	var clusterChainFinalizedBlock flow.Header // finalized block on the cluster chain
    74  	var refChainFinalizedHeight uint64         // finalized height on reference chain
    75  	var refChainFinalizedID flow.Identifier    // finalized block ID on reference chain
    76  
    77  	startTime := time.Now()
    78  
    79  	// STEP ONE: build a lookup for excluding duplicated transactions.
    80  	// This is briefly how it works:
    81  	//
    82  	// Let E be the global transaction expiry.
    83  	// When incorporating a new collection C, with reference height R, we enforce
    84  	// that it contains only transactions with reference heights in [R,R+E).
    85  	// * if we are building C:
    86  	//   * we don't build expired collections (ie. our local finalized consensus height is at most R+E-1)
    87  	//   * we don't include transactions referencing un-finalized blocks
    88  	//   * therefore, C will contain only transactions with reference heights in [R,R+E)
    89  	// * if we are validating C:
    90  	//   * honest validators only consider C valid if all its transactions have reference heights in [R,R+E)
    91  	//
    92  	// Therefore, to check for duplicates, we only need a lookup for transactions in collection
    93  	// with expiry windows that overlap with our collection under construction.
    94  	//
    95  	// A collection with overlapping expiry window can be finalized or un-finalized.
    96  	// * to find all non-expired and finalized collections, we make use of an index
    97  	//   (main_chain_finalized_height -> cluster_block_ids with respective reference height), to search for a range of main chain heights	//   which could be only referenced by collections with overlapping expiry windows.
    98  	// * to find all overlapping and un-finalized collections, we can't use the above index, because it's
    99  	//   only for finalized collections. Instead, we simply traverse along the chain up to the last
   100  	//   finalized block. This could possibly include some collections with expiry windows that DON'T
   101  	//   overlap with our collection under construction, but it is unlikely and doesn't impact correctness.
   102  	//
   103  	// After combining both the finalized and un-finalized cluster blocks that overlap with our expiry window,
   104  	// we can iterate through their transactions, and build a lookup for excluding duplicated transactions.
   105  	err := b.db.View(func(btx *badger.Txn) error {
   106  
   107  		// TODO (ramtin): enable this again
   108  		// b.tracer.StartSpan(parentID, trace.COLBuildOnSetup)
   109  		// defer b.tracer.FinishSpan(parentID, trace.COLBuildOnSetup)
   110  
   111  		err := operation.RetrieveHeader(parentID, &parent)(btx)
   112  		if err != nil {
   113  			return fmt.Errorf("could not retrieve parent: %w", err)
   114  		}
   115  
   116  		// retrieve the height and ID of the latest finalized block ON THE MAIN CHAIN
   117  		// this is used as the reference point for transaction expiry
   118  		err = operation.RetrieveFinalizedHeight(&refChainFinalizedHeight)(btx)
   119  		if err != nil {
   120  			return fmt.Errorf("could not retrieve main finalized height: %w", err)
   121  		}
   122  		err = operation.LookupBlockHeight(refChainFinalizedHeight, &refChainFinalizedID)(btx)
   123  		if err != nil {
   124  			return fmt.Errorf("could not retrieve main finalized ID: %w", err)
   125  		}
   126  
   127  		// retrieve the finalized boundary ON THE CLUSTER CHAIN
   128  		err = procedure.RetrieveLatestFinalizedClusterHeader(parent.ChainID, &clusterChainFinalizedBlock)(btx)
   129  		if err != nil {
   130  			return fmt.Errorf("could not retrieve cluster final: %w", err)
   131  		}
   132  		return nil
   133  	})
   134  	if err != nil {
   135  		return nil, err
   136  	}
   137  
   138  	// pre-compute the minimum possible reference block height for transactions
   139  	// included in this collection (actual reference height may be greater)
   140  	minPossibleRefHeight := refChainFinalizedHeight - uint64(flow.DefaultTransactionExpiry-b.config.ExpiryBuffer)
   141  	if minPossibleRefHeight > refChainFinalizedHeight {
   142  		minPossibleRefHeight = 0 // overflow check
   143  	}
   144  
   145  	log := b.log.With().
   146  		Hex("parent_id", parentID[:]).
   147  		Str("chain_id", parent.ChainID.String()).
   148  		Uint64("final_ref_height", refChainFinalizedHeight).
   149  		Logger()
   150  
   151  	log.Debug().Msg("building new cluster block")
   152  
   153  	// TODO (ramtin): enable this again
   154  	// b.tracer.FinishSpan(parentID, trace.COLBuildOnSetup)
   155  	// b.tracer.StartSpan(parentID, trace.COLBuildOnUnfinalizedLookup)
   156  	// defer b.tracer.FinishSpan(parentID, trace.COLBuildOnUnfinalizedLookup)
   157  
   158  	// STEP TWO: create a lookup of all previously used transactions on the
   159  	// part of the chain we care about. We do this separately for
   160  	// un-finalized and finalized sections of the chain to decide whether to
   161  	// remove conflicting transactions from the mempool.
   162  
   163  	// keep track of transactions in the ancestry to avoid duplicates
   164  	lookup := newTransactionLookup()
   165  	// keep track of transactions to enforce rate limiting
   166  	limiter := newRateLimiter(b.config, parent.Height+1)
   167  
   168  	// RATE LIMITING: the builder module can be configured to limit the
   169  	// rate at which transactions with a common payer are included in
   170  	// blocks. Depending on the configured limit, we either allow 1
   171  	// transaction every N sequential collections, or we allow K transactions
   172  	// per collection.
   173  
   174  	// first, look up previously included transactions in UN-FINALIZED ancestors
   175  	err = b.populateUnfinalizedAncestryLookup(parentID, clusterChainFinalizedBlock.Height, lookup, limiter)
   176  	if err != nil {
   177  		return nil, fmt.Errorf("could not populate un-finalized ancestry lookout (parent_id=%x): %w", parentID, err)
   178  	}
   179  
   180  	// TODO (ramtin): enable this again
   181  	// b.tracer.FinishSpan(parentID, trace.COLBuildOnUnfinalizedLookup)
   182  	// b.tracer.StartSpan(parentID, trace.COLBuildOnFinalizedLookup)
   183  	// defer b.tracer.FinishSpan(parentID, trace.COLBuildOnFinalizedLookup)
   184  
   185  	// second, look up previously included transactions in FINALIZED ancestors
   186  	err = b.populateFinalizedAncestryLookup(minPossibleRefHeight, refChainFinalizedHeight, lookup, limiter)
   187  	if err != nil {
   188  		return nil, fmt.Errorf("could not populate finalized ancestry lookup: %w", err)
   189  	}
   190  
   191  	// TODO (ramtin): enable this again
   192  	// b.tracer.FinishSpan(parentID, trace.COLBuildOnFinalizedLookup)
   193  	// b.tracer.StartSpan(parentID, trace.COLBuildOnCreatePayload)
   194  	// defer b.tracer.FinishSpan(parentID, trace.COLBuildOnCreatePayload)
   195  
   196  	// STEP THREE: build a payload of valid transactions, while at the same
   197  	// time figuring out the correct reference block ID for the collection.
   198  
   199  	// keep track of the actual smallest reference height of all included transactions
   200  	minRefHeight := uint64(math.MaxUint64)
   201  	minRefID := refChainFinalizedID
   202  
   203  	var transactions []*flow.TransactionBody
   204  	var totalByteSize uint64
   205  	var totalGas uint64
   206  	for _, tx := range b.transactions.All() {
   207  
   208  		// if we have reached maximum number of transactions, stop
   209  		if uint(len(transactions)) >= b.config.MaxCollectionSize {
   210  			break
   211  		}
   212  
   213  		txByteSize := uint64(tx.ByteSize())
   214  		// ignore transactions with tx byte size bigger that the max amount per collection
   215  		// this case shouldn't happen ever since we keep a limit on tx byte size but in case
   216  		// we keep this condition
   217  		if txByteSize > b.config.MaxCollectionByteSize {
   218  			continue
   219  		}
   220  
   221  		// because the max byte size per tx is way smaller than the max collection byte size, we can stop here and not continue.
   222  		// to make it more effective in the future we can continue adding smaller ones
   223  		if totalByteSize+txByteSize > b.config.MaxCollectionByteSize {
   224  			break
   225  		}
   226  
   227  		// ignore transactions with max gas bigger that the max total gas per collection
   228  		// this case shouldn't happen ever but in case we keep this condition
   229  		if tx.GasLimit > b.config.MaxCollectionTotalGas {
   230  			continue
   231  		}
   232  
   233  		// cause the max gas limit per tx is way smaller than the total max gas per collection, we can stop here and not continue.
   234  		// to make it more effective in the future we can continue adding smaller ones
   235  		if totalGas+tx.GasLimit > b.config.MaxCollectionTotalGas {
   236  			break
   237  		}
   238  
   239  		// retrieve the main chain header that was used as reference
   240  		refHeader, err := b.mainHeaders.ByBlockID(tx.ReferenceBlockID)
   241  		if errors.Is(err, storage.ErrNotFound) {
   242  			continue // in case we are configured with liberal transaction ingest rules
   243  		}
   244  		if err != nil {
   245  			return nil, fmt.Errorf("could not retrieve reference header: %w", err)
   246  		}
   247  
   248  		// disallow un-finalized reference blocks
   249  		if refChainFinalizedHeight < refHeader.Height {
   250  			continue
   251  		}
   252  		// make sure the reference block is finalized and not orphaned
   253  		blockFinalizedAtReferenceHeight, err := b.mainHeaders.ByHeight(refHeader.Height)
   254  		if err != nil {
   255  			return nil, fmt.Errorf("could not check that reference block (id=%x) is finalized: %w", tx.ReferenceBlockID, err)
   256  		}
   257  		if blockFinalizedAtReferenceHeight.ID() != tx.ReferenceBlockID {
   258  			// the transaction references an orphaned block - it will never be valid
   259  			b.transactions.Remove(tx.ID())
   260  			continue
   261  		}
   262  
   263  		// ensure the reference block is not too old
   264  		if refHeader.Height < minPossibleRefHeight {
   265  			// the transaction is expired, it will never be valid
   266  			b.transactions.Remove(tx.ID())
   267  			continue
   268  		}
   269  
   270  		txID := tx.ID()
   271  		// check that the transaction was not already used in un-finalized history
   272  		if lookup.isUnfinalizedAncestor(txID) {
   273  			continue
   274  		}
   275  
   276  		// check that the transaction was not already included in finalized history.
   277  		if lookup.isFinalizedAncestor(txID) {
   278  			// remove from mempool, conflicts with finalized block will never be valid
   279  			b.transactions.Remove(txID)
   280  			continue
   281  		}
   282  
   283  		// enforce rate limiting rules
   284  		if limiter.shouldRateLimit(tx) {
   285  			if b.config.DryRunRateLimit {
   286  				// log that this transaction would have been rate-limited, but we will still include it in the collection
   287  				b.log.Info().
   288  					Hex("tx_id", logging.ID(txID)).
   289  					Str("payer_addr", tx.Payer.String()).
   290  					Float64("rate_limit", b.config.MaxPayerTransactionRate).
   291  					Msg("dry-run: observed transaction that would have been rate limited")
   292  			} else {
   293  				b.log.Debug().
   294  					Hex("tx_id", logging.ID(txID)).
   295  					Str("payer_addr", tx.Payer.String()).
   296  					Float64("rate_limit", b.config.MaxPayerTransactionRate).
   297  					Msg("transaction is rate-limited")
   298  				continue
   299  			}
   300  		}
   301  
   302  		// ensure we find the lowest reference block height
   303  		if refHeader.Height < minRefHeight {
   304  			minRefHeight = refHeader.Height
   305  			minRefID = tx.ReferenceBlockID
   306  		}
   307  
   308  		// update per-payer transaction count
   309  		limiter.transactionIncluded(tx)
   310  
   311  		transactions = append(transactions, tx)
   312  		totalByteSize += txByteSize
   313  		totalGas += tx.GasLimit
   314  	}
   315  
   316  	// STEP FOUR: we have a set of transactions that are valid to include
   317  	// on this fork. Now we need to create the collection that will be
   318  	// used in the payload and construct the final proposal model
   319  	// TODO (ramtin): enable this again
   320  	// b.tracer.FinishSpan(parentID, trace.COLBuildOnCreatePayload)
   321  	// b.tracer.StartSpan(parentID, trace.COLBuildOnCreateHeader)
   322  	// defer b.tracer.FinishSpan(parentID, trace.COLBuildOnCreateHeader)
   323  
   324  	// build the payload from the transactions
   325  	payload := cluster.PayloadFromTransactions(minRefID, transactions...)
   326  
   327  	header := &flow.Header{
   328  		ChainID:     parent.ChainID,
   329  		ParentID:    parentID,
   330  		Height:      parent.Height + 1,
   331  		PayloadHash: payload.Hash(),
   332  		Timestamp:   time.Now().UTC(),
   333  
   334  		// NOTE: we rely on the HotStuff-provided setter to set the other
   335  		// fields, which are related to signatures and HotStuff internals
   336  	}
   337  
   338  	// set fields specific to the consensus algorithm
   339  	err = setter(header)
   340  	if err != nil {
   341  		return nil, fmt.Errorf("could not set fields to header: %w", err)
   342  	}
   343  
   344  	proposal = cluster.Block{
   345  		Header:  header,
   346  		Payload: &payload,
   347  	}
   348  
   349  	// TODO (ramtin): enable this again
   350  	// b.tracer.FinishSpan(parentID, trace.COLBuildOnCreateHeader)
   351  
   352  	span, ctx := b.tracer.StartCollectionSpan(context.Background(), proposal.ID(), trace.COLBuildOn, otelTrace.WithTimestamp(startTime))
   353  	defer span.End()
   354  
   355  	dbInsertSpan, _ := b.tracer.StartSpanFromContext(ctx, trace.COLBuildOnDBInsert)
   356  	defer dbInsertSpan.End()
   357  
   358  	// finally we insert the block in a write transaction
   359  	err = operation.RetryOnConflict(b.db.Update, procedure.InsertClusterBlock(&proposal))
   360  	if err != nil {
   361  		return nil, fmt.Errorf("could not insert built block: %w", err)
   362  	}
   363  
   364  	return proposal.Header, nil
   365  }
   366  
   367  // populateUnfinalizedAncestryLookup traverses the unfinalized ancestry backward
   368  // to populate the transaction lookup (used for deduplication) and the rate limiter
   369  // (used to limit transaction submission by payer).
   370  //
   371  // The traversal begins with the block specified by parentID (the block we are
   372  // building on top of) and ends with the oldest unfinalized block in the ancestry.
   373  func (b *Builder) populateUnfinalizedAncestryLookup(parentID flow.Identifier, finalHeight uint64, lookup *transactionLookup, limiter *rateLimiter) error {
   374  
   375  	err := fork.TraverseBackward(b.clusterHeaders, parentID, func(ancestor *flow.Header) error {
   376  		payload, err := b.payloads.ByBlockID(ancestor.ID())
   377  		if err != nil {
   378  			return fmt.Errorf("could not retrieve ancestor payload: %w", err)
   379  		}
   380  
   381  		for _, tx := range payload.Collection.Transactions {
   382  			lookup.addUnfinalizedAncestor(tx.ID())
   383  			limiter.addAncestor(ancestor.Height, tx)
   384  		}
   385  		return nil
   386  	}, fork.ExcludingHeight(finalHeight))
   387  
   388  	return err
   389  }
   390  
   391  // populateFinalizedAncestryLookup traverses the reference block height index to
   392  // populate the transaction lookup (used for deduplication) and the rate limiter
   393  // (used to limit transaction submission by payer).
   394  //
   395  // The traversal is structured so that we check every collection whose reference
   396  // block height translates to a possible constituent transaction which could also
   397  // appear in the collection we are building.
   398  func (b *Builder) populateFinalizedAncestryLookup(minRefHeight, maxRefHeight uint64, lookup *transactionLookup, limiter *rateLimiter) error {
   399  
   400  	// Let E be the global transaction expiry constant, measured in blocks. For each
   401  	// T ∈ `includedTransactions`, we have to decide whether the transaction
   402  	// already appeared in _any_ finalized cluster block.
   403  	// Notation:
   404  	//   - consider a valid cluster block C and let c be its reference block height
   405  	//   - consider a transaction T ∈ `includedTransactions` and let t denote its
   406  	//     reference block height
   407  	//
   408  	// Boundary conditions:
   409  	// 1. C's reference block height is equal to the lowest reference block height of
   410  	//    all its constituent transactions. Hence, for collection C to potentially contain T, it must satisfy c <= t.
   411  	// 2. For T to be eligible for inclusion in collection C, _none_ of the transactions within C are allowed
   412  	// to be expired w.r.t. C's reference block. Hence, for collection C to potentially contain T, it must satisfy t < c + E.
   413  	//
   414  	// Therefore, for collection C to potentially contain transaction T, it must satisfy t - E < c <= t.
   415  	// In other words, we only need to inspect collections with reference block height c ∈ (t-E, t].
   416  	// Consequently, for a set of transactions, with `minRefHeight` (`maxRefHeight`) being the smallest (largest)
   417  	// reference block height, we only need to inspect collections with c ∈ (minRefHeight-E, maxRefHeight].
   418  
   419  	// the finalized cluster blocks which could possibly contain any conflicting transactions
   420  	var clusterBlockIDs []flow.Identifier
   421  	start, end := findRefHeightSearchRangeForConflictingClusterBlocks(minRefHeight, maxRefHeight)
   422  	err := b.db.View(operation.LookupClusterBlocksByReferenceHeightRange(start, end, &clusterBlockIDs))
   423  	if err != nil {
   424  		return fmt.Errorf("could not lookup finalized cluster blocks by reference height range [%d,%d]: %w", start, end, err)
   425  	}
   426  
   427  	for _, blockID := range clusterBlockIDs {
   428  		header, err := b.clusterHeaders.ByBlockID(blockID)
   429  		if err != nil {
   430  			return fmt.Errorf("could not retrieve cluster header (id=%x): %w", blockID, err)
   431  		}
   432  		payload, err := b.payloads.ByBlockID(blockID)
   433  		if err != nil {
   434  			return fmt.Errorf("could not retrieve cluster payload (block_id=%x): %w", blockID, err)
   435  		}
   436  		for _, tx := range payload.Collection.Transactions {
   437  			lookup.addFinalizedAncestor(tx.ID())
   438  			limiter.addAncestor(header.Height, tx)
   439  		}
   440  	}
   441  
   442  	return nil
   443  }
   444  
   445  // findRefHeightSearchRangeForConflictingClusterBlocks computes the range of reference
   446  // block heights of ancestor blocks which could possibly contain transactions
   447  // duplicating those in our collection under construction, based on the range of
   448  // reference heights of transactions in the collection under construction.
   449  //
   450  // Input range is the (inclusive) range of reference heights of transactions included
   451  // in the collection under construction. Output range is the (inclusive) range of
   452  // reference heights which need to be searched.
   453  func findRefHeightSearchRangeForConflictingClusterBlocks(minRefHeight, maxRefHeight uint64) (start, end uint64) {
   454  	start = minRefHeight - flow.DefaultTransactionExpiry + 1
   455  	if start > minRefHeight {
   456  		start = 0 // overflow check
   457  	}
   458  	end = maxRefHeight
   459  	return start, end
   460  }