github.com/ethereum/go-ethereum@v1.16.1/core/rawdb/chain_iterator.go (about)

     1  // Copyright 2020 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package rawdb
    18  
    19  import (
    20  	"encoding/binary"
    21  	"runtime"
    22  	"sync/atomic"
    23  	"time"
    24  
    25  	"github.com/ethereum/go-ethereum/common"
    26  	"github.com/ethereum/go-ethereum/common/prque"
    27  	"github.com/ethereum/go-ethereum/core/types"
    28  	"github.com/ethereum/go-ethereum/ethdb"
    29  	"github.com/ethereum/go-ethereum/log"
    30  	"github.com/ethereum/go-ethereum/rlp"
    31  )
    32  
    33  // InitDatabaseFromFreezer reinitializes an empty database from a previous batch
    34  // of frozen ancient blocks. The method iterates over all the frozen blocks and
    35  // injects into the database the block hash->number mappings.
    36  func InitDatabaseFromFreezer(db ethdb.Database) {
    37  	// If we can't access the freezer or it's empty, abort
    38  	frozen, err := db.Ancients()
    39  	if err != nil || frozen == 0 {
    40  		return
    41  	}
    42  	var (
    43  		batch  = db.NewBatch()
    44  		start  = time.Now()
    45  		logged = start.Add(-7 * time.Second) // Unindex during import is fast, don't double log
    46  		hash   common.Hash
    47  	)
    48  	for i := uint64(0); i < frozen; {
    49  		// We read 100K hashes at a time, for a total of 3.2M
    50  		count := uint64(100_000)
    51  		if i+count > frozen {
    52  			count = frozen - i
    53  		}
    54  		data, err := db.AncientRange(ChainFreezerHashTable, i, count, 32*count)
    55  		if err != nil {
    56  			log.Crit("Failed to init database from freezer", "err", err)
    57  		}
    58  		for j, h := range data {
    59  			number := i + uint64(j)
    60  			hash = common.BytesToHash(h)
    61  			WriteHeaderNumber(batch, hash, number)
    62  			// If enough data was accumulated in memory or we're at the last block, dump to disk
    63  			if batch.ValueSize() > ethdb.IdealBatchSize {
    64  				if err := batch.Write(); err != nil {
    65  					log.Crit("Failed to write data to db", "err", err)
    66  				}
    67  				batch.Reset()
    68  			}
    69  		}
    70  		i += uint64(len(data))
    71  		// If we've spent too much time already, notify the user of what we're doing
    72  		if time.Since(logged) > 8*time.Second {
    73  			log.Info("Initializing database from freezer", "total", frozen, "number", i, "hash", hash, "elapsed", common.PrettyDuration(time.Since(start)))
    74  			logged = time.Now()
    75  		}
    76  	}
    77  	if err := batch.Write(); err != nil {
    78  		log.Crit("Failed to write data to db", "err", err)
    79  	}
    80  	batch.Reset()
    81  
    82  	WriteHeadHeaderHash(db, hash)
    83  	WriteHeadFastBlockHash(db, hash)
    84  	log.Info("Initialized database from freezer", "blocks", frozen, "elapsed", common.PrettyDuration(time.Since(start)))
    85  }
    86  
    87  type blockTxHashes struct {
    88  	number uint64
    89  	hashes []common.Hash
    90  }
    91  
    92  // iterateTransactions iterates over all transactions in the (canon) block
    93  // number(s) given, and yields the hashes on a channel. If there is a signal
    94  // received from interrupt channel, the iteration will be aborted and result
    95  // channel will be closed.
    96  func iterateTransactions(db ethdb.Database, from uint64, to uint64, reverse bool, interrupt chan struct{}) chan *blockTxHashes {
    97  	// One thread sequentially reads data from db
    98  	type numberRlp struct {
    99  		number uint64
   100  		rlp    rlp.RawValue
   101  	}
   102  	if to == from {
   103  		return nil
   104  	}
   105  	threads := to - from
   106  	if cpus := runtime.NumCPU(); threads > uint64(cpus) {
   107  		threads = uint64(cpus)
   108  	}
   109  	var (
   110  		rlpCh    = make(chan *numberRlp, threads*2)     // we send raw rlp over this channel
   111  		hashesCh = make(chan *blockTxHashes, threads*2) // send hashes over hashesCh
   112  	)
   113  	// lookup runs in one instance
   114  	lookup := func() {
   115  		n, end := from, to
   116  		if reverse {
   117  			n, end = to-1, from-1
   118  		}
   119  		defer close(rlpCh)
   120  		for n != end {
   121  			data := ReadCanonicalBodyRLP(db, n, nil)
   122  			// Feed the block to the aggregator, or abort on interrupt
   123  			select {
   124  			case rlpCh <- &numberRlp{n, data}:
   125  			case <-interrupt:
   126  				return
   127  			}
   128  			if reverse {
   129  				n--
   130  			} else {
   131  				n++
   132  			}
   133  		}
   134  	}
   135  	// process runs in parallel
   136  	var nThreadsAlive atomic.Int32
   137  	nThreadsAlive.Store(int32(threads))
   138  	process := func() {
   139  		defer func() {
   140  			// Last processor closes the result channel
   141  			if nThreadsAlive.Add(-1) == 0 {
   142  				close(hashesCh)
   143  			}
   144  		}()
   145  		for data := range rlpCh {
   146  			var body types.Body
   147  			if err := rlp.DecodeBytes(data.rlp, &body); err != nil {
   148  				log.Warn("Failed to decode block body", "block", data.number, "error", err)
   149  				return
   150  			}
   151  			var hashes []common.Hash
   152  			for _, tx := range body.Transactions {
   153  				hashes = append(hashes, tx.Hash())
   154  			}
   155  			result := &blockTxHashes{
   156  				hashes: hashes,
   157  				number: data.number,
   158  			}
   159  			// Feed the block to the aggregator, or abort on interrupt
   160  			select {
   161  			case hashesCh <- result:
   162  			case <-interrupt:
   163  				return
   164  			}
   165  		}
   166  	}
   167  	go lookup() // start the sequential db accessor
   168  	for i := 0; i < int(threads); i++ {
   169  		go process()
   170  	}
   171  	return hashesCh
   172  }
   173  
   174  // indexTransactions creates txlookup indices of the specified block range.
   175  //
   176  // This function iterates canonical chain in reverse order, it has one main advantage:
   177  // We can write tx index tail flag periodically even without the whole indexing
   178  // procedure is finished. So that we can resume indexing procedure next time quickly.
   179  //
   180  // There is a passed channel, the whole procedure will be interrupted if any
   181  // signal received.
   182  func indexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool, report bool) {
   183  	// short circuit for invalid range
   184  	if from >= to {
   185  		return
   186  	}
   187  	var (
   188  		hashesCh = iterateTransactions(db, from, to, true, interrupt)
   189  		batch    = db.NewBatch()
   190  		start    = time.Now()
   191  		logged   = start.Add(-7 * time.Second)
   192  
   193  		// Since we iterate in reverse, we expect the first number to come
   194  		// in to be [to-1]. Therefore, setting lastNum to means that the
   195  		// queue gap-evaluation will work correctly
   196  		lastNum     = to
   197  		queue       = prque.New[int64, *blockTxHashes](nil)
   198  		blocks, txs = 0, 0 // for stats reporting
   199  	)
   200  	for chanDelivery := range hashesCh {
   201  		// Push the delivery into the queue and process contiguous ranges.
   202  		// Since we iterate in reverse, so lower numbers have lower prio, and
   203  		// we can use the number directly as prio marker
   204  		queue.Push(chanDelivery, int64(chanDelivery.number))
   205  		for !queue.Empty() {
   206  			// If the next available item is gapped, return
   207  			if _, priority := queue.Peek(); priority != int64(lastNum-1) {
   208  				break
   209  			}
   210  			// For testing
   211  			if hook != nil && !hook(lastNum-1) {
   212  				break
   213  			}
   214  			// Next block available, pop it off and index it
   215  			delivery := queue.PopItem()
   216  			lastNum = delivery.number
   217  			WriteTxLookupEntries(batch, delivery.number, delivery.hashes)
   218  			blocks++
   219  			txs += len(delivery.hashes)
   220  			// If enough data was accumulated in memory or we're at the last block, dump to disk
   221  			if batch.ValueSize() > ethdb.IdealBatchSize {
   222  				WriteTxIndexTail(batch, lastNum) // Also write the tail here
   223  				if err := batch.Write(); err != nil {
   224  					log.Crit("Failed writing batch to db", "error", err)
   225  					return
   226  				}
   227  				batch.Reset()
   228  			}
   229  			// If we've spent too much time already, notify the user of what we're doing
   230  			if time.Since(logged) > 8*time.Second {
   231  				log.Info("Indexing transactions", "blocks", blocks, "txs", txs, "tail", lastNum, "total", to-from, "elapsed", common.PrettyDuration(time.Since(start)))
   232  				logged = time.Now()
   233  			}
   234  		}
   235  	}
   236  	// Flush the new indexing tail and the last committed data. It can also happen
   237  	// that the last batch is empty because nothing to index, but the tail has to
   238  	// be flushed anyway.
   239  	WriteTxIndexTail(batch, lastNum)
   240  	if err := batch.Write(); err != nil {
   241  		log.Crit("Failed writing batch to db", "error", err)
   242  		return
   243  	}
   244  	logger := log.Debug
   245  	if report {
   246  		logger = log.Info
   247  	}
   248  	select {
   249  	case <-interrupt:
   250  		logger("Transaction indexing interrupted", "blocks", blocks, "txs", txs, "tail", lastNum, "elapsed", common.PrettyDuration(time.Since(start)))
   251  	default:
   252  		logger("Indexed transactions", "blocks", blocks, "txs", txs, "tail", lastNum, "elapsed", common.PrettyDuration(time.Since(start)))
   253  	}
   254  }
   255  
   256  // IndexTransactions creates txlookup indices of the specified block range. The from
   257  // is included while to is excluded.
   258  //
   259  // This function iterates canonical chain in reverse order, it has one main advantage:
   260  // We can write tx index tail flag periodically even without the whole indexing
   261  // procedure is finished. So that we can resume indexing procedure next time quickly.
   262  //
   263  // There is a passed channel, the whole procedure will be interrupted if any
   264  // signal received.
   265  func IndexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, report bool) {
   266  	indexTransactions(db, from, to, interrupt, nil, report)
   267  }
   268  
   269  // indexTransactionsForTesting is the internal debug version with an additional hook.
   270  func indexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) {
   271  	indexTransactions(db, from, to, interrupt, hook, false)
   272  }
   273  
   274  // unindexTransactions removes txlookup indices of the specified block range.
   275  //
   276  // There is a passed channel, the whole procedure will be interrupted if any
   277  // signal received.
   278  func unindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool, report bool) {
   279  	// short circuit for invalid range
   280  	if from >= to {
   281  		return
   282  	}
   283  	var (
   284  		hashesCh = iterateTransactions(db, from, to, false, interrupt)
   285  		batch    = db.NewBatch()
   286  		start    = time.Now()
   287  		logged   = start.Add(-7 * time.Second)
   288  
   289  		// we expect the first number to come in to be [from]. Therefore, setting
   290  		// nextNum to from means that the queue gap-evaluation will work correctly
   291  		nextNum     = from
   292  		queue       = prque.New[int64, *blockTxHashes](nil)
   293  		blocks, txs = 0, 0 // for stats reporting
   294  	)
   295  	// Otherwise spin up the concurrent iterator and unindexer
   296  	for delivery := range hashesCh {
   297  		// Push the delivery into the queue and process contiguous ranges.
   298  		queue.Push(delivery, -int64(delivery.number))
   299  		for !queue.Empty() {
   300  			// If the next available item is gapped, return
   301  			if _, priority := queue.Peek(); -priority != int64(nextNum) {
   302  				break
   303  			}
   304  			// For testing
   305  			if hook != nil && !hook(nextNum) {
   306  				break
   307  			}
   308  			delivery := queue.PopItem()
   309  			nextNum = delivery.number + 1
   310  			DeleteTxLookupEntries(batch, delivery.hashes)
   311  			txs += len(delivery.hashes)
   312  			blocks++
   313  
   314  			// If enough data was accumulated in memory or we're at the last block, dump to disk
   315  			// A batch counts the size of deletion as '1', so we need to flush more
   316  			// often than that.
   317  			if blocks%1000 == 0 {
   318  				WriteTxIndexTail(batch, nextNum)
   319  				if err := batch.Write(); err != nil {
   320  					log.Crit("Failed writing batch to db", "error", err)
   321  					return
   322  				}
   323  				batch.Reset()
   324  			}
   325  			// If we've spent too much time already, notify the user of what we're doing
   326  			if time.Since(logged) > 8*time.Second {
   327  				log.Info("Unindexing transactions", "blocks", blocks, "txs", txs, "total", to-from, "elapsed", common.PrettyDuration(time.Since(start)))
   328  				logged = time.Now()
   329  			}
   330  		}
   331  	}
   332  	// Flush the new indexing tail and the last committed data. It can also happen
   333  	// that the last batch is empty because nothing to unindex, but the tail has to
   334  	// be flushed anyway.
   335  	WriteTxIndexTail(batch, nextNum)
   336  	if err := batch.Write(); err != nil {
   337  		log.Crit("Failed writing batch to db", "error", err)
   338  		return
   339  	}
   340  	logger := log.Debug
   341  	if report {
   342  		logger = log.Info
   343  	}
   344  	select {
   345  	case <-interrupt:
   346  		logger("Transaction unindexing interrupted", "blocks", blocks, "txs", txs, "tail", to, "elapsed", common.PrettyDuration(time.Since(start)))
   347  	default:
   348  		logger("Unindexed transactions", "blocks", blocks, "txs", txs, "tail", to, "elapsed", common.PrettyDuration(time.Since(start)))
   349  	}
   350  }
   351  
   352  // UnindexTransactions removes txlookup indices of the specified block range.
   353  // The from is included while to is excluded.
   354  //
   355  // There is a passed channel, the whole procedure will be interrupted if any
   356  // signal received.
   357  func UnindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, report bool) {
   358  	unindexTransactions(db, from, to, interrupt, nil, report)
   359  }
   360  
   361  // unindexTransactionsForTesting is the internal debug version with an additional hook.
   362  func unindexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) {
   363  	unindexTransactions(db, from, to, interrupt, hook, false)
   364  }
   365  
   366  // PruneTransactionIndex removes all tx index entries below a certain block number.
   367  func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) {
   368  	tail := ReadTxIndexTail(db)
   369  	if tail == nil || *tail > pruneBlock {
   370  		return // no index, or index ends above pruneBlock
   371  	}
   372  	// There are blocks below pruneBlock in the index. Iterate the entire index to remove
   373  	// their entries. Note if this fails, the index is messed up, but tail still points to
   374  	// the old tail.
   375  	var count, removed int
   376  	DeleteAllTxLookupEntries(db, func(txhash common.Hash, v []byte) bool {
   377  		count++
   378  		if count%10000000 == 0 {
   379  			log.Info("Pruning tx index", "count", count, "removed", removed)
   380  		}
   381  		if len(v) > 8 {
   382  			log.Error("Skipping legacy tx index entry", "hash", txhash)
   383  			return false
   384  		}
   385  		bn := decodeNumber(v)
   386  		if bn < pruneBlock {
   387  			removed++
   388  			return true
   389  		}
   390  		return false
   391  	})
   392  	WriteTxIndexTail(db, pruneBlock)
   393  }
   394  
   395  func decodeNumber(b []byte) uint64 {
   396  	var numBuffer [8]byte
   397  	copy(numBuffer[8-len(b):], b)
   398  	return binary.BigEndian.Uint64(numBuffer[:])
   399  }