
     1  // Copyright 2019 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <>.
    17  package rawdb
    19  import (
    20  	"runtime"
    21  	"sync/atomic"
    22  	"time"
    24  	""
    25  	""
    26  	""
    27  	""
    28  	""
    29  	""
    30  )
    32  // InitDatabaseFromFreezer reinitializes an empty database from a previous batch
    33  // of frozen ancient blocks. The method iterates over all the frozen blocks and
    34  // injects into the database the block hash->number mappings.
    35  func InitDatabaseFromFreezer(db ethdb.Database) {
    36  	// If we can't access the freezer or it's empty, abort
    37  	frozen, err := db.Ancients()
    38  	if err != nil || frozen == 0 {
    39  		return
    40  	}
    41  	var (
    42  		batch  = db.NewBatch()
    43  		start  = time.Now()
    44  		logged = start.Add(-7 * time.Second) // Unindex during import is fast, don't double log
    45  		hash   common.Hash
    46  	)
    47  	for i := uint64(0); i < frozen; i++ {
    48  		// Since the freezer has all data in sequential order on a file,
    49  		// it would be 'neat' to read more data in one go, and let the
    50  		// freezerdb return N items (e.g up to 1000 items per go)
    51  		// That would require an API change in Ancients though
    52  		if h, err := db.Ancient(freezerHashTable, i); err != nil {
    53  			log.Crit("Failed to init database from freezer", "err", err)
    54  		} else {
    55  			hash = common.BytesToHash(h)
    56  		}
    57  		WriteHeaderNumber(batch, hash, i)
    58  		// If enough data was accumulated in memory or we're at the last block, dump to disk
    59  		if batch.ValueSize() > ethdb.IdealBatchSize {
    60  			if err := batch.Write(); err != nil {
    61  				log.Crit("Failed to write data to db", "err", err)
    62  			}
    63  			batch.Reset()
    64  		}
    65  		// If we've spent too much time already, notify the user of what we're doing
    66  		if time.Since(logged) > 8*time.Second {
    67  			log.Info("Initializing database from freezer", "total", frozen, "number", i, "hash", hash, "elapsed", common.PrettyDuration(time.Since(start)))
    68  			logged = time.Now()
    69  		}
    70  	}
    71  	if err := batch.Write(); err != nil {
    72  		log.Crit("Failed to write data to db", "err", err)
    73  	}
    74  	batch.Reset()
    76  	WriteHeadHeaderHash(db, hash)
    77  	WriteHeadFastBlockHash(db, hash)
    78  	log.Info("Initialized database from freezer", "blocks", frozen, "elapsed", common.PrettyDuration(time.Since(start)))
    79  }
    81  type blockTxHashes struct {
    82  	number uint64
    83  	hashes []common.Hash
    84  }
    86  // iterateTransactions iterates over all transactions in the (canon) block
    87  // number(s) given, and yields the hashes on a channel. If there is a signal
    88  // received from interrupt channel, the iteration will be aborted and result
    89  // channel will be closed.
    90  func iterateTransactions(db ethdb.Database, from uint64, to uint64, reverse bool, interrupt chan struct{}) chan *blockTxHashes {
    91  	// One thread sequentially reads data from db
    92  	type numberRlp struct {
    93  		number uint64
    94  		rlp    rlp.RawValue
    95  	}
    96  	if to == from {
    97  		return nil
    98  	}
    99  	threads := to - from
   100  	if cpus := runtime.NumCPU(); threads > uint64(cpus) {
   101  		threads = uint64(cpus)
   102  	}
   103  	var (
   104  		rlpCh    = make(chan *numberRlp, threads*2)     // we send raw rlp over this channel
   105  		hashesCh = make(chan *blockTxHashes, threads*2) // send hashes over hashesCh
   106  	)
   107  	// lookup runs in one instance
   108  	lookup := func() {
   109  		n, end := from, to
   110  		if reverse {
   111  			n, end = to-1, from-1
   112  		}
   113  		defer close(rlpCh)
   114  		for n != end {
   115  			data := ReadCanonicalBodyRLP(db, n)
   116  			// Feed the block to the aggregator, or abort on interrupt
   117  			select {
   118  			case rlpCh <- &numberRlp{n, data}:
   119  			case <-interrupt:
   120  				return
   121  			}
   122  			if reverse {
   123  				n--
   124  			} else {
   125  				n++
   126  			}
   127  		}
   128  	}
   129  	// process runs in parallel
   130  	nThreadsAlive := int32(threads)
   131  	process := func() {
   132  		defer func() {
   133  			// Last processor closes the result channel
   134  			if atomic.AddInt32(&nThreadsAlive, -1) == 0 {
   135  				close(hashesCh)
   136  			}
   137  		}()
   139  		var hasher = sha3.NewLegacyKeccak256()
   140  		for data := range rlpCh {
   141  			it, err := rlp.NewListIterator(data.rlp)
   142  			if err != nil {
   143  				log.Warn("tx iteration error", "error", err)
   144  				return
   145  			}
   146  			it.Next()
   147  			txs := it.Value()
   148  			txIt, err := rlp.NewListIterator(txs)
   149  			if err != nil {
   150  				log.Warn("tx iteration error", "error", err)
   151  				return
   152  			}
   153  			var hashes []common.Hash
   154  			for txIt.Next() {
   155  				if err := txIt.Err(); err != nil {
   156  					log.Warn("tx iteration error", "error", err)
   157  					return
   158  				}
   159  				var txHash common.Hash
   160  				hasher.Reset()
   161  				hasher.Write(txIt.Value())
   162  				hasher.Sum(txHash[:0])
   163  				hashes = append(hashes, txHash)
   164  			}
   165  			result := &blockTxHashes{
   166  				hashes: hashes,
   167  				number: data.number,
   168  			}
   169  			// Feed the block to the aggregator, or abort on interrupt
   170  			select {
   171  			case hashesCh <- result:
   172  			case <-interrupt:
   173  				return
   174  			}
   175  		}
   176  	}
   177  	go lookup() // start the sequential db accessor
   178  	for i := 0; i < int(threads); i++ {
   179  		go process()
   180  	}
   181  	return hashesCh
   182  }
   184  // indexTransactions creates txlookup indices of the specified block range.
   185  //
   186  // This function iterates canonical chain in reverse order, it has one main advantage:
   187  // We can write tx index tail flag periodically even without the whole indexing
   188  // procedure is finished. So that we can resume indexing procedure next time quickly.
   189  //
   190  // There is a passed channel, the whole procedure will be interrupted if any
   191  // signal received.
   192  func indexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) {
   193  	// short circuit for invalid range
   194  	if from >= to {
   195  		return
   196  	}
   197  	var (
   198  		hashesCh = iterateTransactions(db, from, to, true, interrupt)
   199  		batch    = db.NewBatch()
   200  		start    = time.Now()
   201  		logged   = start.Add(-7 * time.Second)
   202  		// Since we iterate in reverse, we expect the first number to come
   203  		// in to be [to-1]. Therefore, setting lastNum to means that the
   204  		// prqueue gap-evaluation will work correctly
   205  		lastNum = to
   206  		queue   = prque.New(nil)
   207  		// for stats reporting
   208  		blocks, txs = 0, 0
   209  	)
   210  	for chanDelivery := range hashesCh {
   211  		// Push the delivery into the queue and process contiguous ranges.
   212  		// Since we iterate in reverse, so lower numbers have lower prio, and
   213  		// we can use the number directly as prio marker
   214  		queue.Push(chanDelivery, int64(chanDelivery.number))
   215  		for !queue.Empty() {
   216  			// If the next available item is gapped, return
   217  			if _, priority := queue.Peek(); priority != int64(lastNum-1) {
   218  				break
   219  			}
   220  			// For testing
   221  			if hook != nil && !hook(lastNum-1) {
   222  				break
   223  			}
   224  			// Next block available, pop it off and index it
   225  			delivery := queue.PopItem().(*blockTxHashes)
   226  			lastNum = delivery.number
   227  			WriteTxLookupEntries(batch, delivery.number, delivery.hashes)
   228  			blocks++
   229  			txs += len(delivery.hashes)
   230  			// If enough data was accumulated in memory or we're at the last block, dump to disk
   231  			if batch.ValueSize() > ethdb.IdealBatchSize {
   232  				WriteTxIndexTail(batch, lastNum) // Also write the tail here
   233  				if err := batch.Write(); err != nil {
   234  					log.Crit("Failed writing batch to db", "error", err)
   235  					return
   236  				}
   237  				batch.Reset()
   238  			}
   239  			// If we've spent too much time already, notify the user of what we're doing
   240  			if time.Since(logged) > 8*time.Second {
   241  				log.Info("Indexing transactions", "blocks", blocks, "txs", txs, "tail", lastNum, "total", to-from, "elapsed", common.PrettyDuration(time.Since(start)))
   242  				logged = time.Now()
   243  			}
   244  		}
   245  	}
   246  	// Flush the new indexing tail and the last committed data. It can also happen
   247  	// that the last batch is empty because nothing to index, but the tail has to
   248  	// be flushed anyway.
   249  	WriteTxIndexTail(batch, lastNum)
   250  	if err := batch.Write(); err != nil {
   251  		log.Crit("Failed writing batch to db", "error", err)
   252  		return
   253  	}
   254  	select {
   255  	case <-interrupt:
   256  		log.Debug("Transaction indexing interrupted", "blocks", blocks, "txs", txs, "tail", lastNum, "elapsed", common.PrettyDuration(time.Since(start)))
   257  	default:
   258  		log.Info("Indexed transactions", "blocks", blocks, "txs", txs, "tail", lastNum, "elapsed", common.PrettyDuration(time.Since(start)))
   259  	}
   260  }
   262  // IndexTransactions creates txlookup indices of the specified block range.
   263  //
   264  // This function iterates canonical chain in reverse order, it has one main advantage:
   265  // We can write tx index tail flag periodically even without the whole indexing
   266  // procedure is finished. So that we can resume indexing procedure next time quickly.
   267  //
   268  // There is a passed channel, the whole procedure will be interrupted if any
   269  // signal received.
   270  func IndexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}) {
   271  	indexTransactions(db, from, to, interrupt, nil)
   272  }
   274  // indexTransactionsForTesting is the internal debug version with an additional hook.
   275  func indexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) {
   276  	indexTransactions(db, from, to, interrupt, hook)
   277  }
   279  // unindexTransactions removes txlookup indices of the specified block range.
   280  //
   281  // There is a passed channel, the whole procedure will be interrupted if any
   282  // signal received.
   283  func unindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) {
   284  	// short circuit for invalid range
   285  	if from >= to {
   286  		return
   287  	}
   288  	var (
   289  		hashesCh = iterateTransactions(db, from, to, false, interrupt)
   290  		batch    = db.NewBatch()
   291  		start    = time.Now()
   292  		logged   = start.Add(-7 * time.Second)
   293  		// we expect the first number to come in to be [from]. Therefore, setting
   294  		// nextNum to from means that the prqueue gap-evaluation will work correctly
   295  		nextNum = from
   296  		queue   = prque.New(nil)
   297  		// for stats reporting
   298  		blocks, txs = 0, 0
   299  	)
   300  	// Otherwise spin up the concurrent iterator and unindexer
   301  	for delivery := range hashesCh {
   302  		// Push the delivery into the queue and process contiguous ranges.
   303  		queue.Push(delivery, -int64(delivery.number))
   304  		for !queue.Empty() {
   305  			// If the next available item is gapped, return
   306  			if _, priority := queue.Peek(); -priority != int64(nextNum) {
   307  				break
   308  			}
   309  			// For testing
   310  			if hook != nil && !hook(nextNum) {
   311  				break
   312  			}
   313  			delivery := queue.PopItem().(*blockTxHashes)
   314  			nextNum = delivery.number + 1
   315  			DeleteTxLookupEntries(batch, delivery.hashes)
   316  			txs += len(delivery.hashes)
   317  			blocks++
   319  			// If enough data was accumulated in memory or we're at the last block, dump to disk
   320  			// A batch counts the size of deletion as '1', so we need to flush more
   321  			// often than that.
   322  			if blocks%1000 == 0 {
   323  				WriteTxIndexTail(batch, nextNum)
   324  				if err := batch.Write(); err != nil {
   325  					log.Crit("Failed writing batch to db", "error", err)
   326  					return
   327  				}
   328  				batch.Reset()
   329  			}
   330  			// If we've spent too much time already, notify the user of what we're doing
   331  			if time.Since(logged) > 8*time.Second {
   332  				log.Info("Unindexing transactions", "blocks", blocks, "txs", txs, "total", to-from, "elapsed", common.PrettyDuration(time.Since(start)))
   333  				logged = time.Now()
   334  			}
   335  		}
   336  	}
   337  	// Flush the new indexing tail and the last committed data. It can also happen
   338  	// that the last batch is empty because nothing to unindex, but the tail has to
   339  	// be flushed anyway.
   340  	WriteTxIndexTail(batch, nextNum)
   341  	if err := batch.Write(); err != nil {
   342  		log.Crit("Failed writing batch to db", "error", err)
   343  		return
   344  	}
   345  	select {
   346  	case <-interrupt:
   347  		log.Debug("Transaction unindexing interrupted", "blocks", blocks, "txs", txs, "tail", to, "elapsed", common.PrettyDuration(time.Since(start)))
   348  	default:
   349  		log.Info("Unindexed transactions", "blocks", blocks, "txs", txs, "tail", to, "elapsed", common.PrettyDuration(time.Since(start)))
   350  	}
   351  }
   353  // UnindexTransactions removes txlookup indices of the specified block range.
   354  //
   355  // There is a passed channel, the whole procedure will be interrupted if any
   356  // signal received.
   357  func UnindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}) {
   358  	unindexTransactions(db, from, to, interrupt, nil)
   359  }
   361  // unindexTransactionsForTesting is the internal debug version with an additional hook.
   362  func unindexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) {
   363  	unindexTransactions(db, from, to, interrupt, hook)
   364  }