github.com/ethereum/go-ethereum@v1.16.1/core/txindexer.go (about)

     1  // Copyright 2024 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package core
    18  
    19  import (
    20  	"fmt"
    21  	"sync/atomic"
    22  
    23  	"github.com/ethereum/go-ethereum/common"
    24  	"github.com/ethereum/go-ethereum/core/rawdb"
    25  	"github.com/ethereum/go-ethereum/ethdb"
    26  	"github.com/ethereum/go-ethereum/log"
    27  )
    28  
    29  // TxIndexProgress is the struct describing the progress for transaction indexing.
    30  type TxIndexProgress struct {
    31  	Indexed   uint64 // number of blocks whose transactions are indexed
    32  	Remaining uint64 // number of blocks whose transactions are not indexed yet
    33  }
    34  
    35  // Done returns an indicator if the transaction indexing is finished.
    36  func (progress TxIndexProgress) Done() bool {
    37  	return progress.Remaining == 0
    38  }
    39  
    40  // txIndexer is the module responsible for maintaining transaction indexes
    41  // according to the configured indexing range by users.
    42  type txIndexer struct {
    43  	// limit is the maximum number of blocks from head whose tx indexes
    44  	// are reserved:
    45  	//  * 0: means the entire chain should be indexed
    46  	//  * N: means the latest N blocks [HEAD-N+1, HEAD] should be indexed
    47  	//       and all others shouldn't.
    48  	limit uint64
    49  
    50  	// The current head of blockchain for transaction indexing. This field
    51  	// is accessed by both the indexer and the indexing progress queries.
    52  	head atomic.Uint64
    53  
    54  	// The current tail of the indexed transactions, null indicates
    55  	// that no transactions have been indexed yet.
    56  	//
    57  	// This field is accessed by both the indexer and the indexing
    58  	// progress queries.
    59  	tail atomic.Pointer[uint64]
    60  
    61  	// cutoff denotes the block number before which the chain segment should
    62  	// be pruned and not available locally.
    63  	cutoff uint64
    64  	db     ethdb.Database
    65  	term   chan chan struct{}
    66  	closed chan struct{}
    67  }
    68  
    69  // newTxIndexer initializes the transaction indexer.
    70  func newTxIndexer(limit uint64, chain *BlockChain) *txIndexer {
    71  	cutoff, _ := chain.HistoryPruningCutoff()
    72  	indexer := &txIndexer{
    73  		limit:  limit,
    74  		cutoff: cutoff,
    75  		db:     chain.db,
    76  		term:   make(chan chan struct{}),
    77  		closed: make(chan struct{}),
    78  	}
    79  	indexer.head.Store(indexer.resolveHead())
    80  	indexer.tail.Store(rawdb.ReadTxIndexTail(chain.db))
    81  
    82  	go indexer.loop(chain)
    83  
    84  	var msg string
    85  	if limit == 0 {
    86  		if indexer.cutoff == 0 {
    87  			msg = "entire chain"
    88  		} else {
    89  			msg = fmt.Sprintf("blocks since #%d", indexer.cutoff)
    90  		}
    91  	} else {
    92  		msg = fmt.Sprintf("last %d blocks", limit)
    93  	}
    94  	log.Info("Initialized transaction indexer", "range", msg)
    95  
    96  	return indexer
    97  }
    98  
    99  // run executes the scheduled indexing/unindexing task in a separate thread.
   100  // If the stop channel is closed, the task should terminate as soon as possible.
   101  // The done channel will be closed once the task is complete.
   102  //
   103  // Existing transaction indexes are assumed to be valid, with both the head
   104  // and tail above the configured cutoff.
   105  func (indexer *txIndexer) run(head uint64, stop chan struct{}, done chan struct{}) {
   106  	defer func() { close(done) }()
   107  
   108  	// Short circuit if the chain is either empty, or entirely below the
   109  	// cutoff point.
   110  	if head == 0 || head < indexer.cutoff {
   111  		return
   112  	}
   113  	// The tail flag is not existent, it means the node is just initialized
   114  	// and all blocks in the chain (part of them may from ancient store) are
   115  	// not indexed yet, index the chain according to the configured limit.
   116  	tail := rawdb.ReadTxIndexTail(indexer.db)
   117  	if tail == nil {
   118  		// Determine the first block for transaction indexing, taking the
   119  		// configured cutoff point into account.
   120  		from := uint64(0)
   121  		if indexer.limit != 0 && head >= indexer.limit {
   122  			from = head - indexer.limit + 1
   123  		}
   124  		from = max(from, indexer.cutoff)
   125  		rawdb.IndexTransactions(indexer.db, from, head+1, stop, true)
   126  		return
   127  	}
   128  	// The tail flag is existent (which means indexes in [tail, head] should be
   129  	// present), while the whole chain are requested for indexing.
   130  	if indexer.limit == 0 || head < indexer.limit {
   131  		if *tail > 0 {
   132  			from := max(uint64(0), indexer.cutoff)
   133  			rawdb.IndexTransactions(indexer.db, from, *tail, stop, true)
   134  		}
   135  		return
   136  	}
   137  	// The tail flag is existent, adjust the index range according to configured
   138  	// limit and the latest chain head.
   139  	from := head - indexer.limit + 1
   140  	from = max(from, indexer.cutoff)
   141  	if from < *tail {
   142  		// Reindex a part of missing indices and rewind index tail to HEAD-limit
   143  		rawdb.IndexTransactions(indexer.db, from, *tail, stop, true)
   144  	} else {
   145  		// Unindex a part of stale indices and forward index tail to HEAD-limit
   146  		rawdb.UnindexTransactions(indexer.db, *tail, from, stop, false)
   147  	}
   148  }
   149  
   150  // repair ensures that transaction indexes are in a valid state and invalidates
   151  // them if they are not. The following cases are considered invalid:
   152  // * The index tail is higher than the chain head.
   153  // * The chain head is below the configured cutoff, but the index tail is not empty.
   154  // * The index tail is below the configured cutoff, but it is not empty.
   155  func (indexer *txIndexer) repair(head uint64) {
   156  	// If the transactions haven't been indexed yet, nothing to repair
   157  	tail := rawdb.ReadTxIndexTail(indexer.db)
   158  	if tail == nil {
   159  		return
   160  	}
   161  	// The transaction index tail is higher than the chain head, which may occur
   162  	// when the chain is rewound to a historical height below the index tail.
   163  	// Purge the transaction indexes from the database. **It's not a common case
   164  	// to rewind the chain head below the index tail**.
   165  	if *tail > head {
   166  		// A crash may occur between the two delete operations,
   167  		// potentially leaving dangling indexes in the database.
   168  		// However, this is considered acceptable.
   169  		indexer.tail.Store(nil)
   170  		rawdb.DeleteTxIndexTail(indexer.db)
   171  		rawdb.DeleteAllTxLookupEntries(indexer.db, nil)
   172  		log.Warn("Purge transaction indexes", "head", head, "tail", *tail)
   173  		return
   174  	}
   175  
   176  	// If the entire chain is below the configured cutoff point,
   177  	// removing the tail of transaction indexing and purges the
   178  	// transaction indexes. **It's not a common case, as the cutoff
   179  	// is usually defined below the chain head**.
   180  	if head < indexer.cutoff {
   181  		// A crash may occur between the two delete operations,
   182  		// potentially leaving dangling indexes in the database.
   183  		// However, this is considered acceptable.
   184  		//
   185  		// The leftover indexes can't be unindexed by scanning
   186  		// the blocks as they are not guaranteed to be available.
   187  		// Traversing the database directly within the transaction
   188  		// index namespace might be slow and expensive, but we
   189  		// have no choice.
   190  		indexer.tail.Store(nil)
   191  		rawdb.DeleteTxIndexTail(indexer.db)
   192  		rawdb.DeleteAllTxLookupEntries(indexer.db, nil)
   193  		log.Warn("Purge transaction indexes", "head", head, "cutoff", indexer.cutoff)
   194  		return
   195  	}
   196  
   197  	// The chain head is above the cutoff while the tail is below the
   198  	// cutoff. Shift the tail to the cutoff point and remove the indexes
   199  	// below.
   200  	if *tail < indexer.cutoff {
   201  		// A crash may occur between the two delete operations,
   202  		// potentially leaving dangling indexes in the database.
   203  		// However, this is considered acceptable.
   204  		indexer.tail.Store(&indexer.cutoff)
   205  		rawdb.WriteTxIndexTail(indexer.db, indexer.cutoff)
   206  		rawdb.DeleteAllTxLookupEntries(indexer.db, func(txhash common.Hash, blob []byte) bool {
   207  			n := rawdb.DecodeTxLookupEntry(blob, indexer.db)
   208  			return n != nil && *n < indexer.cutoff
   209  		})
   210  		log.Warn("Purge transaction indexes below cutoff", "tail", *tail, "cutoff", indexer.cutoff)
   211  	}
   212  }
   213  
   214  // resolveHead resolves the block number of the current chain head.
   215  func (indexer *txIndexer) resolveHead() uint64 {
   216  	headBlockHash := rawdb.ReadHeadBlockHash(indexer.db)
   217  	if headBlockHash == (common.Hash{}) {
   218  		return 0
   219  	}
   220  	headBlockNumber := rawdb.ReadHeaderNumber(indexer.db, headBlockHash)
   221  	if headBlockNumber == nil {
   222  		return 0
   223  	}
   224  	return *headBlockNumber
   225  }
   226  
   227  // loop is the scheduler of the indexer, assigning indexing/unindexing tasks depending
   228  // on the received chain event.
   229  func (indexer *txIndexer) loop(chain *BlockChain) {
   230  	defer close(indexer.closed)
   231  
   232  	// Listening to chain events and manipulate the transaction indexes.
   233  	var (
   234  		stop   chan struct{} // Non-nil if background routine is active
   235  		done   chan struct{} // Non-nil if background routine is active
   236  		headCh = make(chan ChainHeadEvent)
   237  		sub    = chain.SubscribeChainHeadEvent(headCh)
   238  	)
   239  	defer sub.Unsubscribe()
   240  
   241  	// Validate the transaction indexes and repair if necessary
   242  	head := indexer.head.Load()
   243  	indexer.repair(head)
   244  
   245  	// Launch the initial processing if chain is not empty (head != genesis).
   246  	// This step is useful in these scenarios that chain has no progress.
   247  	if head != 0 {
   248  		stop = make(chan struct{})
   249  		done = make(chan struct{})
   250  		go indexer.run(head, stop, done)
   251  	}
   252  	for {
   253  		select {
   254  		case h := <-headCh:
   255  			indexer.head.Store(h.Header.Number.Uint64())
   256  			if done == nil {
   257  				stop = make(chan struct{})
   258  				done = make(chan struct{})
   259  				go indexer.run(h.Header.Number.Uint64(), stop, done)
   260  			}
   261  
   262  		case <-done:
   263  			stop = nil
   264  			done = nil
   265  			indexer.tail.Store(rawdb.ReadTxIndexTail(indexer.db))
   266  
   267  		case ch := <-indexer.term:
   268  			if stop != nil {
   269  				close(stop)
   270  			}
   271  			if done != nil {
   272  				log.Info("Waiting background transaction indexer to exit")
   273  				<-done
   274  			}
   275  			close(ch)
   276  			return
   277  		}
   278  	}
   279  }
   280  
   281  // report returns the tx indexing progress.
   282  func (indexer *txIndexer) report(head uint64, tail *uint64) TxIndexProgress {
   283  	// Special case if the head is even below the cutoff,
   284  	// nothing to index.
   285  	if head < indexer.cutoff {
   286  		return TxIndexProgress{
   287  			Indexed:   0,
   288  			Remaining: 0,
   289  		}
   290  	}
   291  	// Compute how many blocks are supposed to be indexed
   292  	total := indexer.limit
   293  	if indexer.limit == 0 || total > head {
   294  		total = head + 1 // genesis included
   295  	}
   296  	length := head - indexer.cutoff + 1 // all available chain for indexing
   297  	if total > length {
   298  		total = length
   299  	}
   300  	// Compute how many blocks have been indexed
   301  	var indexed uint64
   302  	if tail != nil {
   303  		indexed = head - *tail + 1
   304  	}
   305  	// The value of indexed might be larger than total if some blocks need
   306  	// to be unindexed, avoiding a negative remaining.
   307  	var remaining uint64
   308  	if indexed < total {
   309  		remaining = total - indexed
   310  	}
   311  	return TxIndexProgress{
   312  		Indexed:   indexed,
   313  		Remaining: remaining,
   314  	}
   315  }
   316  
   317  // txIndexProgress retrieves the transaction indexing progress. The reported
   318  // progress may slightly lag behind the actual indexing state, as the tail is
   319  // only updated at the end of each indexing operation. However, this delay is
   320  // considered acceptable.
   321  func (indexer *txIndexer) txIndexProgress() TxIndexProgress {
   322  	return indexer.report(indexer.head.Load(), indexer.tail.Load())
   323  }
   324  
   325  // close shutdown the indexer. Safe to be called for multiple times.
   326  func (indexer *txIndexer) close() {
   327  	ch := make(chan struct{})
   328  	select {
   329  	case indexer.term <- ch:
   330  		<-ch
   331  	case <-indexer.closed:
   332  	}
   333  }