github.com/ethw3/go-ethereuma@v0.0.0-20221013053120-c14602a4c23c/core/rawdb/chain_iterator.go (about)

     1  // Copyright 2020 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package rawdb
    18  
    19  import (
    20  	"runtime"
    21  	"sync/atomic"
    22  	"time"
    23  
    24  	"github.com/ethw3/go-ethereuma/common"
    25  	"github.com/ethw3/go-ethereuma/common/prque"
    26  	"github.com/ethw3/go-ethereuma/core/types"
    27  	"github.com/ethw3/go-ethereuma/ethdb"
    28  	"github.com/ethw3/go-ethereuma/log"
    29  	"github.com/ethw3/go-ethereuma/rlp"
    30  )
    31  
    32  // InitDatabaseFromFreezer reinitializes an empty database from a previous batch
    33  // of frozen ancient blocks. The method iterates over all the frozen blocks and
    34  // injects into the database the block hash->number mappings.
    35  func InitDatabaseFromFreezer(db ethdb.Database) {
    36  	// If we can't access the freezer or it's empty, abort
    37  	frozen, err := db.Ancients()
    38  	if err != nil || frozen == 0 {
    39  		return
    40  	}
    41  	var (
    42  		batch  = db.NewBatch()
    43  		start  = time.Now()
    44  		logged = start.Add(-7 * time.Second) // Unindex during import is fast, don't double log
    45  		hash   common.Hash
    46  	)
    47  	for i := uint64(0); i < frozen; {
    48  		// We read 100K hashes at a time, for a total of 3.2M
    49  		count := uint64(100_000)
    50  		if i+count > frozen {
    51  			count = frozen - i
    52  		}
    53  		data, err := db.AncientRange(chainFreezerHashTable, i, count, 32*count)
    54  		if err != nil {
    55  			log.Crit("Failed to init database from freezer", "err", err)
    56  		}
    57  		for j, h := range data {
    58  			number := i + uint64(j)
    59  			hash = common.BytesToHash(h)
    60  			WriteHeaderNumber(batch, hash, number)
    61  			// If enough data was accumulated in memory or we're at the last block, dump to disk
    62  			if batch.ValueSize() > ethdb.IdealBatchSize {
    63  				if err := batch.Write(); err != nil {
    64  					log.Crit("Failed to write data to db", "err", err)
    65  				}
    66  				batch.Reset()
    67  			}
    68  		}
    69  		i += uint64(len(data))
    70  		// If we've spent too much time already, notify the user of what we're doing
    71  		if time.Since(logged) > 8*time.Second {
    72  			log.Info("Initializing database from freezer", "total", frozen, "number", i, "hash", hash, "elapsed", common.PrettyDuration(time.Since(start)))
    73  			logged = time.Now()
    74  		}
    75  	}
    76  	if err := batch.Write(); err != nil {
    77  		log.Crit("Failed to write data to db", "err", err)
    78  	}
    79  	batch.Reset()
    80  
    81  	WriteHeadHeaderHash(db, hash)
    82  	WriteHeadFastBlockHash(db, hash)
    83  	log.Info("Initialized database from freezer", "blocks", frozen, "elapsed", common.PrettyDuration(time.Since(start)))
    84  }
    85  
    86  type blockTxHashes struct {
    87  	number uint64
    88  	hashes []common.Hash
    89  }
    90  
    91  // iterateTransactions iterates over all transactions in the (canon) block
    92  // number(s) given, and yields the hashes on a channel. If there is a signal
    93  // received from interrupt channel, the iteration will be aborted and result
    94  // channel will be closed.
    95  func iterateTransactions(db ethdb.Database, from uint64, to uint64, reverse bool, interrupt chan struct{}) chan *blockTxHashes {
    96  	// One thread sequentially reads data from db
    97  	type numberRlp struct {
    98  		number uint64
    99  		rlp    rlp.RawValue
   100  	}
   101  	if to == from {
   102  		return nil
   103  	}
   104  	threads := to - from
   105  	if cpus := runtime.NumCPU(); threads > uint64(cpus) {
   106  		threads = uint64(cpus)
   107  	}
   108  	var (
   109  		rlpCh    = make(chan *numberRlp, threads*2)     // we send raw rlp over this channel
   110  		hashesCh = make(chan *blockTxHashes, threads*2) // send hashes over hashesCh
   111  	)
   112  	// lookup runs in one instance
   113  	lookup := func() {
   114  		n, end := from, to
   115  		if reverse {
   116  			n, end = to-1, from-1
   117  		}
   118  		defer close(rlpCh)
   119  		for n != end {
   120  			data := ReadCanonicalBodyRLP(db, n)
   121  			// Feed the block to the aggregator, or abort on interrupt
   122  			select {
   123  			case rlpCh <- &numberRlp{n, data}:
   124  			case <-interrupt:
   125  				return
   126  			}
   127  			if reverse {
   128  				n--
   129  			} else {
   130  				n++
   131  			}
   132  		}
   133  	}
   134  	// process runs in parallel
   135  	nThreadsAlive := int32(threads)
   136  	process := func() {
   137  		defer func() {
   138  			// Last processor closes the result channel
   139  			if atomic.AddInt32(&nThreadsAlive, -1) == 0 {
   140  				close(hashesCh)
   141  			}
   142  		}()
   143  		for data := range rlpCh {
   144  			var body types.Body
   145  			if err := rlp.DecodeBytes(data.rlp, &body); err != nil {
   146  				log.Warn("Failed to decode block body", "block", data.number, "error", err)
   147  				return
   148  			}
   149  			var hashes []common.Hash
   150  			for _, tx := range body.Transactions {
   151  				hashes = append(hashes, tx.Hash())
   152  			}
   153  			result := &blockTxHashes{
   154  				hashes: hashes,
   155  				number: data.number,
   156  			}
   157  			// Feed the block to the aggregator, or abort on interrupt
   158  			select {
   159  			case hashesCh <- result:
   160  			case <-interrupt:
   161  				return
   162  			}
   163  		}
   164  	}
   165  	go lookup() // start the sequential db accessor
   166  	for i := 0; i < int(threads); i++ {
   167  		go process()
   168  	}
   169  	return hashesCh
   170  }
   171  
   172  // indexTransactions creates txlookup indices of the specified block range.
   173  //
   174  // This function iterates canonical chain in reverse order, it has one main advantage:
   175  // We can write tx index tail flag periodically even without the whole indexing
   176  // procedure is finished. So that we can resume indexing procedure next time quickly.
   177  //
   178  // There is a passed channel, the whole procedure will be interrupted if any
   179  // signal received.
   180  func indexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) {
   181  	// short circuit for invalid range
   182  	if from >= to {
   183  		return
   184  	}
   185  	var (
   186  		hashesCh = iterateTransactions(db, from, to, true, interrupt)
   187  		batch    = db.NewBatch()
   188  		start    = time.Now()
   189  		logged   = start.Add(-7 * time.Second)
   190  		// Since we iterate in reverse, we expect the first number to come
   191  		// in to be [to-1]. Therefore, setting lastNum to means that the
   192  		// prqueue gap-evaluation will work correctly
   193  		lastNum = to
   194  		queue   = prque.New(nil)
   195  		// for stats reporting
   196  		blocks, txs = 0, 0
   197  	)
   198  	for chanDelivery := range hashesCh {
   199  		// Push the delivery into the queue and process contiguous ranges.
   200  		// Since we iterate in reverse, so lower numbers have lower prio, and
   201  		// we can use the number directly as prio marker
   202  		queue.Push(chanDelivery, int64(chanDelivery.number))
   203  		for !queue.Empty() {
   204  			// If the next available item is gapped, return
   205  			if _, priority := queue.Peek(); priority != int64(lastNum-1) {
   206  				break
   207  			}
   208  			// For testing
   209  			if hook != nil && !hook(lastNum-1) {
   210  				break
   211  			}
   212  			// Next block available, pop it off and index it
   213  			delivery := queue.PopItem().(*blockTxHashes)
   214  			lastNum = delivery.number
   215  			WriteTxLookupEntries(batch, delivery.number, delivery.hashes)
   216  			blocks++
   217  			txs += len(delivery.hashes)
   218  			// If enough data was accumulated in memory or we're at the last block, dump to disk
   219  			if batch.ValueSize() > ethdb.IdealBatchSize {
   220  				WriteTxIndexTail(batch, lastNum) // Also write the tail here
   221  				if err := batch.Write(); err != nil {
   222  					log.Crit("Failed writing batch to db", "error", err)
   223  					return
   224  				}
   225  				batch.Reset()
   226  			}
   227  			// If we've spent too much time already, notify the user of what we're doing
   228  			if time.Since(logged) > 8*time.Second {
   229  				log.Info("Indexing transactions", "blocks", blocks, "txs", txs, "tail", lastNum, "total", to-from, "elapsed", common.PrettyDuration(time.Since(start)))
   230  				logged = time.Now()
   231  			}
   232  		}
   233  	}
   234  	// Flush the new indexing tail and the last committed data. It can also happen
   235  	// that the last batch is empty because nothing to index, but the tail has to
   236  	// be flushed anyway.
   237  	WriteTxIndexTail(batch, lastNum)
   238  	if err := batch.Write(); err != nil {
   239  		log.Crit("Failed writing batch to db", "error", err)
   240  		return
   241  	}
   242  	select {
   243  	case <-interrupt:
   244  		log.Debug("Transaction indexing interrupted", "blocks", blocks, "txs", txs, "tail", lastNum, "elapsed", common.PrettyDuration(time.Since(start)))
   245  	default:
   246  		log.Info("Indexed transactions", "blocks", blocks, "txs", txs, "tail", lastNum, "elapsed", common.PrettyDuration(time.Since(start)))
   247  	}
   248  }
   249  
   250  // IndexTransactions creates txlookup indices of the specified block range. The from
   251  // is included while to is excluded.
   252  //
   253  // This function iterates canonical chain in reverse order, it has one main advantage:
   254  // We can write tx index tail flag periodically even without the whole indexing
   255  // procedure is finished. So that we can resume indexing procedure next time quickly.
   256  //
   257  // There is a passed channel, the whole procedure will be interrupted if any
   258  // signal received.
   259  func IndexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}) {
   260  	indexTransactions(db, from, to, interrupt, nil)
   261  }
   262  
   263  // indexTransactionsForTesting is the internal debug version with an additional hook.
   264  func indexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) {
   265  	indexTransactions(db, from, to, interrupt, hook)
   266  }
   267  
   268  // unindexTransactions removes txlookup indices of the specified block range.
   269  //
   270  // There is a passed channel, the whole procedure will be interrupted if any
   271  // signal received.
   272  func unindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) {
   273  	// short circuit for invalid range
   274  	if from >= to {
   275  		return
   276  	}
   277  	var (
   278  		hashesCh = iterateTransactions(db, from, to, false, interrupt)
   279  		batch    = db.NewBatch()
   280  		start    = time.Now()
   281  		logged   = start.Add(-7 * time.Second)
   282  		// we expect the first number to come in to be [from]. Therefore, setting
   283  		// nextNum to from means that the prqueue gap-evaluation will work correctly
   284  		nextNum = from
   285  		queue   = prque.New(nil)
   286  		// for stats reporting
   287  		blocks, txs = 0, 0
   288  	)
   289  	// Otherwise spin up the concurrent iterator and unindexer
   290  	for delivery := range hashesCh {
   291  		// Push the delivery into the queue and process contiguous ranges.
   292  		queue.Push(delivery, -int64(delivery.number))
   293  		for !queue.Empty() {
   294  			// If the next available item is gapped, return
   295  			if _, priority := queue.Peek(); -priority != int64(nextNum) {
   296  				break
   297  			}
   298  			// For testing
   299  			if hook != nil && !hook(nextNum) {
   300  				break
   301  			}
   302  			delivery := queue.PopItem().(*blockTxHashes)
   303  			nextNum = delivery.number + 1
   304  			DeleteTxLookupEntries(batch, delivery.hashes)
   305  			txs += len(delivery.hashes)
   306  			blocks++
   307  
   308  			// If enough data was accumulated in memory or we're at the last block, dump to disk
   309  			// A batch counts the size of deletion as '1', so we need to flush more
   310  			// often than that.
   311  			if blocks%1000 == 0 {
   312  				WriteTxIndexTail(batch, nextNum)
   313  				if err := batch.Write(); err != nil {
   314  					log.Crit("Failed writing batch to db", "error", err)
   315  					return
   316  				}
   317  				batch.Reset()
   318  			}
   319  			// If we've spent too much time already, notify the user of what we're doing
   320  			if time.Since(logged) > 8*time.Second {
   321  				log.Info("Unindexing transactions", "blocks", blocks, "txs", txs, "total", to-from, "elapsed", common.PrettyDuration(time.Since(start)))
   322  				logged = time.Now()
   323  			}
   324  		}
   325  	}
   326  	// Flush the new indexing tail and the last committed data. It can also happen
   327  	// that the last batch is empty because nothing to unindex, but the tail has to
   328  	// be flushed anyway.
   329  	WriteTxIndexTail(batch, nextNum)
   330  	if err := batch.Write(); err != nil {
   331  		log.Crit("Failed writing batch to db", "error", err)
   332  		return
   333  	}
   334  	select {
   335  	case <-interrupt:
   336  		log.Debug("Transaction unindexing interrupted", "blocks", blocks, "txs", txs, "tail", to, "elapsed", common.PrettyDuration(time.Since(start)))
   337  	default:
   338  		log.Info("Unindexed transactions", "blocks", blocks, "txs", txs, "tail", to, "elapsed", common.PrettyDuration(time.Since(start)))
   339  	}
   340  }
   341  
   342  // UnindexTransactions removes txlookup indices of the specified block range.
   343  // The from is included while to is excluded.
   344  //
   345  // There is a passed channel, the whole procedure will be interrupted if any
   346  // signal received.
   347  func UnindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}) {
   348  	unindexTransactions(db, from, to, interrupt, nil)
   349  }
   350  
   351  // unindexTransactionsForTesting is the internal debug version with an additional hook.
   352  func unindexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) {
   353  	unindexTransactions(db, from, to, interrupt, hook)
   354  }