github.com/koko1123/flow-go-1@v0.29.6/module/builder/collection/builder.go (about) 1 package collection 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "math" 8 "time" 9 10 "github.com/dgraph-io/badger/v3" 11 "github.com/rs/zerolog" 12 otelTrace "go.opentelemetry.io/otel/trace" 13 14 "github.com/koko1123/flow-go-1/model/cluster" 15 "github.com/koko1123/flow-go-1/model/flow" 16 "github.com/koko1123/flow-go-1/module" 17 "github.com/koko1123/flow-go-1/module/mempool" 18 "github.com/koko1123/flow-go-1/module/trace" 19 "github.com/koko1123/flow-go-1/state/fork" 20 "github.com/koko1123/flow-go-1/storage" 21 "github.com/koko1123/flow-go-1/storage/badger/operation" 22 "github.com/koko1123/flow-go-1/storage/badger/procedure" 23 "github.com/koko1123/flow-go-1/utils/logging" 24 ) 25 26 // Builder is the builder for collection block payloads. Upon providing a 27 // payload hash, it also memorizes the payload contents. 28 // 29 // NOTE: Builder is NOT safe for use with multiple goroutines. Since the 30 // HotStuff event loop is the only consumer of this interface and is single 31 // threaded, this is OK. 32 type Builder struct { 33 db *badger.DB 34 mainHeaders storage.Headers 35 clusterHeaders storage.Headers 36 payloads storage.ClusterPayloads 37 transactions mempool.Transactions 38 tracer module.Tracer 39 config Config 40 log zerolog.Logger 41 } 42 43 func NewBuilder(db *badger.DB, tracer module.Tracer, mainHeaders storage.Headers, clusterHeaders storage.Headers, payloads storage.ClusterPayloads, transactions mempool.Transactions, log zerolog.Logger, opts ...Opt) (*Builder, error) { 44 45 b := Builder{ 46 db: db, 47 tracer: tracer, 48 mainHeaders: mainHeaders, 49 clusterHeaders: clusterHeaders, 50 payloads: payloads, 51 transactions: transactions, 52 config: DefaultConfig(), 53 log: log.With().Str("component", "cluster_builder").Logger(), 54 } 55 56 for _, apply := range opts { 57 apply(&b.config) 58 } 59 60 // sanity check config 61 if b.config.ExpiryBuffer >= flow.DefaultTransactionExpiry { 62 return nil, fmt.Errorf("invalid configured expiry buffer exceeds tx expiry (%d > %d)", b.config.ExpiryBuffer, flow.DefaultTransactionExpiry) 63 } 64 65 return &b, nil 66 } 67 68 // BuildOn creates a new block built on the given parent. It produces a payload 69 // that is valid with respect to the un-finalized chain it extends. 70 func (b *Builder) BuildOn(parentID flow.Identifier, setter func(*flow.Header) error) (*flow.Header, error) { 71 var proposal cluster.Block // proposal we are building 72 var parent flow.Header // parent of the proposal we are building 73 var clusterChainFinalizedBlock flow.Header // finalized block on the cluster chain 74 var refChainFinalizedHeight uint64 // finalized height on reference chain 75 var refChainFinalizedID flow.Identifier // finalized block ID on reference chain 76 77 startTime := time.Now() 78 79 // STEP ONE: build a lookup for excluding duplicated transactions. 80 // This is briefly how it works: 81 // 82 // Let E be the global transaction expiry. 83 // When incorporating a new collection C, with reference height R, we enforce 84 // that it contains only transactions with reference heights in [R,R+E). 85 // * if we are building C: 86 // * we don't build expired collections (ie. our local finalized consensus height is at most R+E-1) 87 // * we don't include transactions referencing un-finalized blocks 88 // * therefore, C will contain only transactions with reference heights in [R,R+E) 89 // * if we are validating C: 90 // * honest validators only consider C valid if all its transactions have reference heights in [R,R+E) 91 // 92 // Therefore, to check for duplicates, we only need a lookup for transactions in collection 93 // with expiry windows that overlap with our collection under construction. 94 // 95 // A collection with overlapping expiry window can be finalized or un-finalized. 96 // * to find all non-expired and finalized collections, we make use of an index 97 // (main_chain_finalized_height -> cluster_block_ids with respective reference height), to search for a range of main chain heights // which could be only referenced by collections with overlapping expiry windows. 98 // * to find all overlapping and un-finalized collections, we can't use the above index, because it's 99 // only for finalized collections. Instead, we simply traverse along the chain up to the last 100 // finalized block. This could possibly include some collections with expiry windows that DON'T 101 // overlap with our collection under construction, but it is unlikely and doesn't impact correctness. 102 // 103 // After combining both the finalized and un-finalized cluster blocks that overlap with our expiry window, 104 // we can iterate through their transactions, and build a lookup for excluding duplicated transactions. 105 err := b.db.View(func(btx *badger.Txn) error { 106 107 // TODO (ramtin): enable this again 108 // b.tracer.StartSpan(parentID, trace.COLBuildOnSetup) 109 // defer b.tracer.FinishSpan(parentID, trace.COLBuildOnSetup) 110 111 err := operation.RetrieveHeader(parentID, &parent)(btx) 112 if err != nil { 113 return fmt.Errorf("could not retrieve parent: %w", err) 114 } 115 116 // retrieve the height and ID of the latest finalized block ON THE MAIN CHAIN 117 // this is used as the reference point for transaction expiry 118 err = operation.RetrieveFinalizedHeight(&refChainFinalizedHeight)(btx) 119 if err != nil { 120 return fmt.Errorf("could not retrieve main finalized height: %w", err) 121 } 122 err = operation.LookupBlockHeight(refChainFinalizedHeight, &refChainFinalizedID)(btx) 123 if err != nil { 124 return fmt.Errorf("could not retrieve main finalized ID: %w", err) 125 } 126 127 // retrieve the finalized boundary ON THE CLUSTER CHAIN 128 err = procedure.RetrieveLatestFinalizedClusterHeader(parent.ChainID, &clusterChainFinalizedBlock)(btx) 129 if err != nil { 130 return fmt.Errorf("could not retrieve cluster final: %w", err) 131 } 132 return nil 133 }) 134 if err != nil { 135 return nil, err 136 } 137 138 // pre-compute the minimum possible reference block height for transactions 139 // included in this collection (actual reference height may be greater) 140 minPossibleRefHeight := refChainFinalizedHeight - uint64(flow.DefaultTransactionExpiry-b.config.ExpiryBuffer) 141 if minPossibleRefHeight > refChainFinalizedHeight { 142 minPossibleRefHeight = 0 // overflow check 143 } 144 145 log := b.log.With(). 146 Hex("parent_id", parentID[:]). 147 Str("chain_id", parent.ChainID.String()). 148 Uint64("final_ref_height", refChainFinalizedHeight). 149 Logger() 150 151 log.Debug().Msg("building new cluster block") 152 153 // TODO (ramtin): enable this again 154 // b.tracer.FinishSpan(parentID, trace.COLBuildOnSetup) 155 // b.tracer.StartSpan(parentID, trace.COLBuildOnUnfinalizedLookup) 156 // defer b.tracer.FinishSpan(parentID, trace.COLBuildOnUnfinalizedLookup) 157 158 // STEP TWO: create a lookup of all previously used transactions on the 159 // part of the chain we care about. We do this separately for 160 // un-finalized and finalized sections of the chain to decide whether to 161 // remove conflicting transactions from the mempool. 162 163 // keep track of transactions in the ancestry to avoid duplicates 164 lookup := newTransactionLookup() 165 // keep track of transactions to enforce rate limiting 166 limiter := newRateLimiter(b.config, parent.Height+1) 167 168 // RATE LIMITING: the builder module can be configured to limit the 169 // rate at which transactions with a common payer are included in 170 // blocks. Depending on the configured limit, we either allow 1 171 // transaction every N sequential collections, or we allow K transactions 172 // per collection. 173 174 // first, look up previously included transactions in UN-FINALIZED ancestors 175 err = b.populateUnfinalizedAncestryLookup(parentID, clusterChainFinalizedBlock.Height, lookup, limiter) 176 if err != nil { 177 return nil, fmt.Errorf("could not populate un-finalized ancestry lookout (parent_id=%x): %w", parentID, err) 178 } 179 180 // TODO (ramtin): enable this again 181 // b.tracer.FinishSpan(parentID, trace.COLBuildOnUnfinalizedLookup) 182 // b.tracer.StartSpan(parentID, trace.COLBuildOnFinalizedLookup) 183 // defer b.tracer.FinishSpan(parentID, trace.COLBuildOnFinalizedLookup) 184 185 // second, look up previously included transactions in FINALIZED ancestors 186 err = b.populateFinalizedAncestryLookup(minPossibleRefHeight, refChainFinalizedHeight, lookup, limiter) 187 if err != nil { 188 return nil, fmt.Errorf("could not populate finalized ancestry lookup: %w", err) 189 } 190 191 // TODO (ramtin): enable this again 192 // b.tracer.FinishSpan(parentID, trace.COLBuildOnFinalizedLookup) 193 // b.tracer.StartSpan(parentID, trace.COLBuildOnCreatePayload) 194 // defer b.tracer.FinishSpan(parentID, trace.COLBuildOnCreatePayload) 195 196 // STEP THREE: build a payload of valid transactions, while at the same 197 // time figuring out the correct reference block ID for the collection. 198 199 // keep track of the actual smallest reference height of all included transactions 200 minRefHeight := uint64(math.MaxUint64) 201 minRefID := refChainFinalizedID 202 203 var transactions []*flow.TransactionBody 204 var totalByteSize uint64 205 var totalGas uint64 206 for _, tx := range b.transactions.All() { 207 208 // if we have reached maximum number of transactions, stop 209 if uint(len(transactions)) >= b.config.MaxCollectionSize { 210 break 211 } 212 213 txByteSize := uint64(tx.ByteSize()) 214 // ignore transactions with tx byte size bigger that the max amount per collection 215 // this case shouldn't happen ever since we keep a limit on tx byte size but in case 216 // we keep this condition 217 if txByteSize > b.config.MaxCollectionByteSize { 218 continue 219 } 220 221 // because the max byte size per tx is way smaller than the max collection byte size, we can stop here and not continue. 222 // to make it more effective in the future we can continue adding smaller ones 223 if totalByteSize+txByteSize > b.config.MaxCollectionByteSize { 224 break 225 } 226 227 // ignore transactions with max gas bigger that the max total gas per collection 228 // this case shouldn't happen ever but in case we keep this condition 229 if tx.GasLimit > b.config.MaxCollectionTotalGas { 230 continue 231 } 232 233 // cause the max gas limit per tx is way smaller than the total max gas per collection, we can stop here and not continue. 234 // to make it more effective in the future we can continue adding smaller ones 235 if totalGas+tx.GasLimit > b.config.MaxCollectionTotalGas { 236 break 237 } 238 239 // retrieve the main chain header that was used as reference 240 refHeader, err := b.mainHeaders.ByBlockID(tx.ReferenceBlockID) 241 if errors.Is(err, storage.ErrNotFound) { 242 continue // in case we are configured with liberal transaction ingest rules 243 } 244 if err != nil { 245 return nil, fmt.Errorf("could not retrieve reference header: %w", err) 246 } 247 248 // disallow un-finalized reference blocks 249 if refChainFinalizedHeight < refHeader.Height { 250 continue 251 } 252 // make sure the reference block is finalized and not orphaned 253 blockFinalizedAtReferenceHeight, err := b.mainHeaders.ByHeight(refHeader.Height) 254 if err != nil { 255 return nil, fmt.Errorf("could not check that reference block (id=%x) is finalized: %w", tx.ReferenceBlockID, err) 256 } 257 if blockFinalizedAtReferenceHeight.ID() != tx.ReferenceBlockID { 258 // the transaction references an orphaned block - it will never be valid 259 b.transactions.Remove(tx.ID()) 260 continue 261 } 262 263 // ensure the reference block is not too old 264 if refHeader.Height < minPossibleRefHeight { 265 // the transaction is expired, it will never be valid 266 b.transactions.Remove(tx.ID()) 267 continue 268 } 269 270 txID := tx.ID() 271 // check that the transaction was not already used in un-finalized history 272 if lookup.isUnfinalizedAncestor(txID) { 273 continue 274 } 275 276 // check that the transaction was not already included in finalized history. 277 if lookup.isFinalizedAncestor(txID) { 278 // remove from mempool, conflicts with finalized block will never be valid 279 b.transactions.Remove(txID) 280 continue 281 } 282 283 // enforce rate limiting rules 284 if limiter.shouldRateLimit(tx) { 285 if b.config.DryRunRateLimit { 286 // log that this transaction would have been rate-limited, but we will still include it in the collection 287 b.log.Info(). 288 Hex("tx_id", logging.ID(txID)). 289 Str("payer_addr", tx.Payer.String()). 290 Float64("rate_limit", b.config.MaxPayerTransactionRate). 291 Msg("dry-run: observed transaction that would have been rate limited") 292 } else { 293 b.log.Debug(). 294 Hex("tx_id", logging.ID(txID)). 295 Str("payer_addr", tx.Payer.String()). 296 Float64("rate_limit", b.config.MaxPayerTransactionRate). 297 Msg("transaction is rate-limited") 298 continue 299 } 300 } 301 302 // ensure we find the lowest reference block height 303 if refHeader.Height < minRefHeight { 304 minRefHeight = refHeader.Height 305 minRefID = tx.ReferenceBlockID 306 } 307 308 // update per-payer transaction count 309 limiter.transactionIncluded(tx) 310 311 transactions = append(transactions, tx) 312 totalByteSize += txByteSize 313 totalGas += tx.GasLimit 314 } 315 316 // STEP FOUR: we have a set of transactions that are valid to include 317 // on this fork. Now we need to create the collection that will be 318 // used in the payload and construct the final proposal model 319 // TODO (ramtin): enable this again 320 // b.tracer.FinishSpan(parentID, trace.COLBuildOnCreatePayload) 321 // b.tracer.StartSpan(parentID, trace.COLBuildOnCreateHeader) 322 // defer b.tracer.FinishSpan(parentID, trace.COLBuildOnCreateHeader) 323 324 // build the payload from the transactions 325 payload := cluster.PayloadFromTransactions(minRefID, transactions...) 326 327 header := &flow.Header{ 328 ChainID: parent.ChainID, 329 ParentID: parentID, 330 Height: parent.Height + 1, 331 PayloadHash: payload.Hash(), 332 Timestamp: time.Now().UTC(), 333 334 // NOTE: we rely on the HotStuff-provided setter to set the other 335 // fields, which are related to signatures and HotStuff internals 336 } 337 338 // set fields specific to the consensus algorithm 339 err = setter(header) 340 if err != nil { 341 return nil, fmt.Errorf("could not set fields to header: %w", err) 342 } 343 344 proposal = cluster.Block{ 345 Header: header, 346 Payload: &payload, 347 } 348 349 // TODO (ramtin): enable this again 350 // b.tracer.FinishSpan(parentID, trace.COLBuildOnCreateHeader) 351 352 span, ctx := b.tracer.StartCollectionSpan(context.Background(), proposal.ID(), trace.COLBuildOn, otelTrace.WithTimestamp(startTime)) 353 defer span.End() 354 355 dbInsertSpan, _ := b.tracer.StartSpanFromContext(ctx, trace.COLBuildOnDBInsert) 356 defer dbInsertSpan.End() 357 358 // finally we insert the block in a write transaction 359 err = operation.RetryOnConflict(b.db.Update, procedure.InsertClusterBlock(&proposal)) 360 if err != nil { 361 return nil, fmt.Errorf("could not insert built block: %w", err) 362 } 363 364 return proposal.Header, nil 365 } 366 367 // populateUnfinalizedAncestryLookup traverses the unfinalized ancestry backward 368 // to populate the transaction lookup (used for deduplication) and the rate limiter 369 // (used to limit transaction submission by payer). 370 // 371 // The traversal begins with the block specified by parentID (the block we are 372 // building on top of) and ends with the oldest unfinalized block in the ancestry. 373 func (b *Builder) populateUnfinalizedAncestryLookup(parentID flow.Identifier, finalHeight uint64, lookup *transactionLookup, limiter *rateLimiter) error { 374 375 err := fork.TraverseBackward(b.clusterHeaders, parentID, func(ancestor *flow.Header) error { 376 payload, err := b.payloads.ByBlockID(ancestor.ID()) 377 if err != nil { 378 return fmt.Errorf("could not retrieve ancestor payload: %w", err) 379 } 380 381 for _, tx := range payload.Collection.Transactions { 382 lookup.addUnfinalizedAncestor(tx.ID()) 383 limiter.addAncestor(ancestor.Height, tx) 384 } 385 return nil 386 }, fork.ExcludingHeight(finalHeight)) 387 388 return err 389 } 390 391 // populateFinalizedAncestryLookup traverses the reference block height index to 392 // populate the transaction lookup (used for deduplication) and the rate limiter 393 // (used to limit transaction submission by payer). 394 // 395 // The traversal is structured so that we check every collection whose reference 396 // block height translates to a possible constituent transaction which could also 397 // appear in the collection we are building. 398 func (b *Builder) populateFinalizedAncestryLookup(minRefHeight, maxRefHeight uint64, lookup *transactionLookup, limiter *rateLimiter) error { 399 400 // Let E be the global transaction expiry constant, measured in blocks. For each 401 // T ∈ `includedTransactions`, we have to decide whether the transaction 402 // already appeared in _any_ finalized cluster block. 403 // Notation: 404 // - consider a valid cluster block C and let c be its reference block height 405 // - consider a transaction T ∈ `includedTransactions` and let t denote its 406 // reference block height 407 // 408 // Boundary conditions: 409 // 1. C's reference block height is equal to the lowest reference block height of 410 // all its constituent transactions. Hence, for collection C to potentially contain T, it must satisfy c <= t. 411 // 2. For T to be eligible for inclusion in collection C, _none_ of the transactions within C are allowed 412 // to be expired w.r.t. C's reference block. Hence, for collection C to potentially contain T, it must satisfy t < c + E. 413 // 414 // Therefore, for collection C to potentially contain transaction T, it must satisfy t - E < c <= t. 415 // In other words, we only need to inspect collections with reference block height c ∈ (t-E, t]. 416 // Consequently, for a set of transactions, with `minRefHeight` (`maxRefHeight`) being the smallest (largest) 417 // reference block height, we only need to inspect collections with c ∈ (minRefHeight-E, maxRefHeight]. 418 419 // the finalized cluster blocks which could possibly contain any conflicting transactions 420 var clusterBlockIDs []flow.Identifier 421 start, end := findRefHeightSearchRangeForConflictingClusterBlocks(minRefHeight, maxRefHeight) 422 err := b.db.View(operation.LookupClusterBlocksByReferenceHeightRange(start, end, &clusterBlockIDs)) 423 if err != nil { 424 return fmt.Errorf("could not lookup finalized cluster blocks by reference height range [%d,%d]: %w", start, end, err) 425 } 426 427 for _, blockID := range clusterBlockIDs { 428 header, err := b.clusterHeaders.ByBlockID(blockID) 429 if err != nil { 430 return fmt.Errorf("could not retrieve cluster header (id=%x): %w", blockID, err) 431 } 432 payload, err := b.payloads.ByBlockID(blockID) 433 if err != nil { 434 return fmt.Errorf("could not retrieve cluster payload (block_id=%x): %w", blockID, err) 435 } 436 for _, tx := range payload.Collection.Transactions { 437 lookup.addFinalizedAncestor(tx.ID()) 438 limiter.addAncestor(header.Height, tx) 439 } 440 } 441 442 return nil 443 } 444 445 // findRefHeightSearchRangeForConflictingClusterBlocks computes the range of reference 446 // block heights of ancestor blocks which could possibly contain transactions 447 // duplicating those in our collection under construction, based on the range of 448 // reference heights of transactions in the collection under construction. 449 // 450 // Input range is the (inclusive) range of reference heights of transactions included 451 // in the collection under construction. Output range is the (inclusive) range of 452 // reference heights which need to be searched. 453 func findRefHeightSearchRangeForConflictingClusterBlocks(minRefHeight, maxRefHeight uint64) (start, end uint64) { 454 start = minRefHeight - flow.DefaultTransactionExpiry + 1 455 if start > minRefHeight { 456 start = 0 // overflow check 457 } 458 end = maxRefHeight 459 return start, end 460 }