github.com/ethereum/go-ethereum@v1.14.3/core/rawdb/chain_iterator.go (about)

     1  // Copyright 2020 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package rawdb
    18  
    19  import (
    20  	"runtime"
    21  	"sync/atomic"
    22  	"time"
    23  
    24  	"github.com/ethereum/go-ethereum/common"
    25  	"github.com/ethereum/go-ethereum/common/prque"
    26  	"github.com/ethereum/go-ethereum/core/types"
    27  	"github.com/ethereum/go-ethereum/ethdb"
    28  	"github.com/ethereum/go-ethereum/log"
    29  	"github.com/ethereum/go-ethereum/rlp"
    30  )
    31  
    32  // InitDatabaseFromFreezer reinitializes an empty database from a previous batch
    33  // of frozen ancient blocks. The method iterates over all the frozen blocks and
    34  // injects into the database the block hash->number mappings.
    35  func InitDatabaseFromFreezer(db ethdb.Database) {
    36  	// If we can't access the freezer or it's empty, abort
    37  	frozen, err := db.Ancients()
    38  	if err != nil || frozen == 0 {
    39  		return
    40  	}
    41  	var (
    42  		batch  = db.NewBatch()
    43  		start  = time.Now()
    44  		logged = start.Add(-7 * time.Second) // Unindex during import is fast, don't double log
    45  		hash   common.Hash
    46  	)
    47  	for i := uint64(0); i < frozen; {
    48  		// We read 100K hashes at a time, for a total of 3.2M
    49  		count := uint64(100_000)
    50  		if i+count > frozen {
    51  			count = frozen - i
    52  		}
    53  		data, err := db.AncientRange(ChainFreezerHashTable, i, count, 32*count)
    54  		if err != nil {
    55  			log.Crit("Failed to init database from freezer", "err", err)
    56  		}
    57  		for j, h := range data {
    58  			number := i + uint64(j)
    59  			hash = common.BytesToHash(h)
    60  			WriteHeaderNumber(batch, hash, number)
    61  			// If enough data was accumulated in memory or we're at the last block, dump to disk
    62  			if batch.ValueSize() > ethdb.IdealBatchSize {
    63  				if err := batch.Write(); err != nil {
    64  					log.Crit("Failed to write data to db", "err", err)
    65  				}
    66  				batch.Reset()
    67  			}
    68  		}
    69  		i += uint64(len(data))
    70  		// If we've spent too much time already, notify the user of what we're doing
    71  		if time.Since(logged) > 8*time.Second {
    72  			log.Info("Initializing database from freezer", "total", frozen, "number", i, "hash", hash, "elapsed", common.PrettyDuration(time.Since(start)))
    73  			logged = time.Now()
    74  		}
    75  	}
    76  	if err := batch.Write(); err != nil {
    77  		log.Crit("Failed to write data to db", "err", err)
    78  	}
    79  	batch.Reset()
    80  
    81  	WriteHeadHeaderHash(db, hash)
    82  	WriteHeadFastBlockHash(db, hash)
    83  	log.Info("Initialized database from freezer", "blocks", frozen, "elapsed", common.PrettyDuration(time.Since(start)))
    84  }
    85  
    86  type blockTxHashes struct {
    87  	number uint64
    88  	hashes []common.Hash
    89  }
    90  
    91  // iterateTransactions iterates over all transactions in the (canon) block
    92  // number(s) given, and yields the hashes on a channel. If there is a signal
    93  // received from interrupt channel, the iteration will be aborted and result
    94  // channel will be closed.
    95  func iterateTransactions(db ethdb.Database, from uint64, to uint64, reverse bool, interrupt chan struct{}) chan *blockTxHashes {
    96  	// One thread sequentially reads data from db
    97  	type numberRlp struct {
    98  		number uint64
    99  		rlp    rlp.RawValue
   100  	}
   101  	if to == from {
   102  		return nil
   103  	}
   104  	threads := to - from
   105  	if cpus := runtime.NumCPU(); threads > uint64(cpus) {
   106  		threads = uint64(cpus)
   107  	}
   108  	var (
   109  		rlpCh    = make(chan *numberRlp, threads*2)     // we send raw rlp over this channel
   110  		hashesCh = make(chan *blockTxHashes, threads*2) // send hashes over hashesCh
   111  	)
   112  	// lookup runs in one instance
   113  	lookup := func() {
   114  		n, end := from, to
   115  		if reverse {
   116  			n, end = to-1, from-1
   117  		}
   118  		defer close(rlpCh)
   119  		for n != end {
   120  			data := ReadCanonicalBodyRLP(db, n)
   121  			// Feed the block to the aggregator, or abort on interrupt
   122  			select {
   123  			case rlpCh <- &numberRlp{n, data}:
   124  			case <-interrupt:
   125  				return
   126  			}
   127  			if reverse {
   128  				n--
   129  			} else {
   130  				n++
   131  			}
   132  		}
   133  	}
   134  	// process runs in parallel
   135  	var nThreadsAlive atomic.Int32
   136  	nThreadsAlive.Store(int32(threads))
   137  	process := func() {
   138  		defer func() {
   139  			// Last processor closes the result channel
   140  			if nThreadsAlive.Add(-1) == 0 {
   141  				close(hashesCh)
   142  			}
   143  		}()
   144  		for data := range rlpCh {
   145  			var body types.Body
   146  			if err := rlp.DecodeBytes(data.rlp, &body); err != nil {
   147  				log.Warn("Failed to decode block body", "block", data.number, "error", err)
   148  				return
   149  			}
   150  			var hashes []common.Hash
   151  			for _, tx := range body.Transactions {
   152  				hashes = append(hashes, tx.Hash())
   153  			}
   154  			result := &blockTxHashes{
   155  				hashes: hashes,
   156  				number: data.number,
   157  			}
   158  			// Feed the block to the aggregator, or abort on interrupt
   159  			select {
   160  			case hashesCh <- result:
   161  			case <-interrupt:
   162  				return
   163  			}
   164  		}
   165  	}
   166  	go lookup() // start the sequential db accessor
   167  	for i := 0; i < int(threads); i++ {
   168  		go process()
   169  	}
   170  	return hashesCh
   171  }
   172  
   173  // indexTransactions creates txlookup indices of the specified block range.
   174  //
   175  // This function iterates canonical chain in reverse order, it has one main advantage:
   176  // We can write tx index tail flag periodically even without the whole indexing
   177  // procedure is finished. So that we can resume indexing procedure next time quickly.
   178  //
   179  // There is a passed channel, the whole procedure will be interrupted if any
   180  // signal received.
   181  func indexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool, report bool) {
   182  	// short circuit for invalid range
   183  	if from >= to {
   184  		return
   185  	}
   186  	var (
   187  		hashesCh = iterateTransactions(db, from, to, true, interrupt)
   188  		batch    = db.NewBatch()
   189  		start    = time.Now()
   190  		logged   = start.Add(-7 * time.Second)
   191  
   192  		// Since we iterate in reverse, we expect the first number to come
   193  		// in to be [to-1]. Therefore, setting lastNum to means that the
   194  		// queue gap-evaluation will work correctly
   195  		lastNum     = to
   196  		queue       = prque.New[int64, *blockTxHashes](nil)
   197  		blocks, txs = 0, 0 // for stats reporting
   198  	)
   199  	for chanDelivery := range hashesCh {
   200  		// Push the delivery into the queue and process contiguous ranges.
   201  		// Since we iterate in reverse, so lower numbers have lower prio, and
   202  		// we can use the number directly as prio marker
   203  		queue.Push(chanDelivery, int64(chanDelivery.number))
   204  		for !queue.Empty() {
   205  			// If the next available item is gapped, return
   206  			if _, priority := queue.Peek(); priority != int64(lastNum-1) {
   207  				break
   208  			}
   209  			// For testing
   210  			if hook != nil && !hook(lastNum-1) {
   211  				break
   212  			}
   213  			// Next block available, pop it off and index it
   214  			delivery := queue.PopItem()
   215  			lastNum = delivery.number
   216  			WriteTxLookupEntries(batch, delivery.number, delivery.hashes)
   217  			blocks++
   218  			txs += len(delivery.hashes)
   219  			// If enough data was accumulated in memory or we're at the last block, dump to disk
   220  			if batch.ValueSize() > ethdb.IdealBatchSize {
   221  				WriteTxIndexTail(batch, lastNum) // Also write the tail here
   222  				if err := batch.Write(); err != nil {
   223  					log.Crit("Failed writing batch to db", "error", err)
   224  					return
   225  				}
   226  				batch.Reset()
   227  			}
   228  			// If we've spent too much time already, notify the user of what we're doing
   229  			if time.Since(logged) > 8*time.Second {
   230  				log.Info("Indexing transactions", "blocks", blocks, "txs", txs, "tail", lastNum, "total", to-from, "elapsed", common.PrettyDuration(time.Since(start)))
   231  				logged = time.Now()
   232  			}
   233  		}
   234  	}
   235  	// Flush the new indexing tail and the last committed data. It can also happen
   236  	// that the last batch is empty because nothing to index, but the tail has to
   237  	// be flushed anyway.
   238  	WriteTxIndexTail(batch, lastNum)
   239  	if err := batch.Write(); err != nil {
   240  		log.Crit("Failed writing batch to db", "error", err)
   241  		return
   242  	}
   243  	logger := log.Debug
   244  	if report {
   245  		logger = log.Info
   246  	}
   247  	select {
   248  	case <-interrupt:
   249  		logger("Transaction indexing interrupted", "blocks", blocks, "txs", txs, "tail", lastNum, "elapsed", common.PrettyDuration(time.Since(start)))
   250  	default:
   251  		logger("Indexed transactions", "blocks", blocks, "txs", txs, "tail", lastNum, "elapsed", common.PrettyDuration(time.Since(start)))
   252  	}
   253  }
   254  
   255  // IndexTransactions creates txlookup indices of the specified block range. The from
   256  // is included while to is excluded.
   257  //
   258  // This function iterates canonical chain in reverse order, it has one main advantage:
   259  // We can write tx index tail flag periodically even without the whole indexing
   260  // procedure is finished. So that we can resume indexing procedure next time quickly.
   261  //
   262  // There is a passed channel, the whole procedure will be interrupted if any
   263  // signal received.
   264  func IndexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, report bool) {
   265  	indexTransactions(db, from, to, interrupt, nil, report)
   266  }
   267  
   268  // indexTransactionsForTesting is the internal debug version with an additional hook.
   269  func indexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) {
   270  	indexTransactions(db, from, to, interrupt, hook, false)
   271  }
   272  
   273  // unindexTransactions removes txlookup indices of the specified block range.
   274  //
   275  // There is a passed channel, the whole procedure will be interrupted if any
   276  // signal received.
   277  func unindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool, report bool) {
   278  	// short circuit for invalid range
   279  	if from >= to {
   280  		return
   281  	}
   282  	var (
   283  		hashesCh = iterateTransactions(db, from, to, false, interrupt)
   284  		batch    = db.NewBatch()
   285  		start    = time.Now()
   286  		logged   = start.Add(-7 * time.Second)
   287  
   288  		// we expect the first number to come in to be [from]. Therefore, setting
   289  		// nextNum to from means that the queue gap-evaluation will work correctly
   290  		nextNum     = from
   291  		queue       = prque.New[int64, *blockTxHashes](nil)
   292  		blocks, txs = 0, 0 // for stats reporting
   293  	)
   294  	// Otherwise spin up the concurrent iterator and unindexer
   295  	for delivery := range hashesCh {
   296  		// Push the delivery into the queue and process contiguous ranges.
   297  		queue.Push(delivery, -int64(delivery.number))
   298  		for !queue.Empty() {
   299  			// If the next available item is gapped, return
   300  			if _, priority := queue.Peek(); -priority != int64(nextNum) {
   301  				break
   302  			}
   303  			// For testing
   304  			if hook != nil && !hook(nextNum) {
   305  				break
   306  			}
   307  			delivery := queue.PopItem()
   308  			nextNum = delivery.number + 1
   309  			DeleteTxLookupEntries(batch, delivery.hashes)
   310  			txs += len(delivery.hashes)
   311  			blocks++
   312  
   313  			// If enough data was accumulated in memory or we're at the last block, dump to disk
   314  			// A batch counts the size of deletion as '1', so we need to flush more
   315  			// often than that.
   316  			if blocks%1000 == 0 {
   317  				WriteTxIndexTail(batch, nextNum)
   318  				if err := batch.Write(); err != nil {
   319  					log.Crit("Failed writing batch to db", "error", err)
   320  					return
   321  				}
   322  				batch.Reset()
   323  			}
   324  			// If we've spent too much time already, notify the user of what we're doing
   325  			if time.Since(logged) > 8*time.Second {
   326  				log.Info("Unindexing transactions", "blocks", blocks, "txs", txs, "total", to-from, "elapsed", common.PrettyDuration(time.Since(start)))
   327  				logged = time.Now()
   328  			}
   329  		}
   330  	}
   331  	// Flush the new indexing tail and the last committed data. It can also happen
   332  	// that the last batch is empty because nothing to unindex, but the tail has to
   333  	// be flushed anyway.
   334  	WriteTxIndexTail(batch, nextNum)
   335  	if err := batch.Write(); err != nil {
   336  		log.Crit("Failed writing batch to db", "error", err)
   337  		return
   338  	}
   339  	logger := log.Debug
   340  	if report {
   341  		logger = log.Info
   342  	}
   343  	select {
   344  	case <-interrupt:
   345  		logger("Transaction unindexing interrupted", "blocks", blocks, "txs", txs, "tail", to, "elapsed", common.PrettyDuration(time.Since(start)))
   346  	default:
   347  		logger("Unindexed transactions", "blocks", blocks, "txs", txs, "tail", to, "elapsed", common.PrettyDuration(time.Since(start)))
   348  	}
   349  }
   350  
   351  // UnindexTransactions removes txlookup indices of the specified block range.
   352  // The from is included while to is excluded.
   353  //
   354  // There is a passed channel, the whole procedure will be interrupted if any
   355  // signal received.
   356  func UnindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, report bool) {
   357  	unindexTransactions(db, from, to, interrupt, nil, report)
   358  }
   359  
   360  // unindexTransactionsForTesting is the internal debug version with an additional hook.
   361  func unindexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) {
   362  	unindexTransactions(db, from, to, interrupt, hook, false)
   363  }