github.com/intfoundation/intchain@v0.0.0-20220727031208-4316ad31ca73/core/datareduction/prune_processor.go (about)

     1  package datareduction
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"github.com/intfoundation/intchain/common"
     7  	"github.com/intfoundation/intchain/core"
     8  	"github.com/intfoundation/intchain/core/rawdb"
     9  	"github.com/intfoundation/intchain/core/state"
    10  	"github.com/intfoundation/intchain/intdb"
    11  	"github.com/intfoundation/intchain/log"
    12  	"github.com/intfoundation/intchain/rlp"
    13  	"github.com/intfoundation/intchain/trie"
    14  	"sort"
    15  	"sync/atomic"
    16  	"time"
    17  )
    18  
    19  var (
    20  	// max scan trie height
    21  	max_count_trie uint64 = 1000
    22  	// max retain trie height
    23  	max_remain_trie uint64 = 1000
    24  	// emptyRoot is the known root hash of an empty trie.
    25  	emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
    26  
    27  	pruning int32 // indicate pruning is running or not
    28  )
    29  
    30  type NodeCount map[common.Hash]uint64
    31  
    32  type PruneProcessor struct {
    33  	db      intdb.Database // Low level persistent database to store prune counting statistics
    34  	prunedb PruneDatabase
    35  
    36  	bc      *core.BlockChain
    37  	chainDb intdb.Database // database instance to delete the state/block data
    38  
    39  	pruneBodyData bool
    40  
    41  	nodeCount NodeCount
    42  }
    43  
    44  type PruneStatus struct {
    45  	Running           bool   `json:"is_running"`
    46  	LatestBlockNumber uint64 `json:"latest_block_number"`
    47  	LatestScanNumber  uint64 `json:"latest_scan_number"`
    48  	LatestPruneNumber uint64 `json:"latest_prune_number"`
    49  }
    50  
    51  type processLeafTrie func(addr common.Address, account state.Account)
    52  
    53  func StartPruning() bool {
    54  	return atomic.CompareAndSwapInt32(&pruning, 0, 1)
    55  }
    56  
    57  func StopPruning() bool {
    58  	return atomic.CompareAndSwapInt32(&pruning, 1, 0)
    59  }
    60  
    61  func NewPruneProcessor(chaindb, prunedb intdb.Database, bc *core.BlockChain, pruneBodyData bool) *PruneProcessor {
    62  	return &PruneProcessor{
    63  		db:            prunedb,
    64  		prunedb:       NewDatabase(prunedb),
    65  		bc:            bc,
    66  		chainDb:       chaindb,
    67  		pruneBodyData: pruneBodyData,
    68  		nodeCount:     make(NodeCount),
    69  	}
    70  }
    71  
    72  func (p *PruneProcessor) Process(blockNumber, scanNumber, pruneNumber uint64) (uint64, uint64) {
    73  
    74  	var needScan bool
    75  	var scanStart, scanEnd uint64
    76  	for {
    77  		// Step 1. determine the scan height
    78  		needScan, scanStart, scanEnd = calculateScan(scanNumber, blockNumber)
    79  
    80  		log.Infof("Data Reduction - scan ? %v , %d - %d", needScan, scanStart, scanEnd)
    81  
    82  		if needScan {
    83  
    84  			// Step 2. Read Latest Node Count
    85  			pruneBodyStart := uint64(0)
    86  
    87  			if pruneNumber > 0 {
    88  
    89  				pruneBodyStart = pruneNumber + 1
    90  
    91  				// Add previous state root for prune
    92  				for i := pruneNumber + 1; i <= scanNumber; i++ {
    93  					header := p.bc.GetHeaderByNumber(i)
    94  					p.countBlockChainTrie(header.Root, false)
    95  					log.Infof("countBlockChainTrie for block %d", i)
    96  				}
    97  			} else {
    98  				pruneBodyStart = scanStart
    99  			}
   100  
   101  			for i := scanStart; i <= scanEnd+1; i++ {
   102  
   103  				//TODO Cache the header
   104  				header := p.bc.GetHeaderByNumber(i)
   105  				p.countBlockChainTrie(header.Root, false)
   106  
   107  				//log.Printf("Block: %v, Root %x", i, header.Root)
   108  				if i%max_count_trie == max_count_trie-1 || i == scanEnd {
   109  
   110  					p.bc.MuLock()
   111  
   112  					header := p.bc.CurrentBlock().Header()
   113  					log.Infof("lastest block number is %v\n", header.Number.Uint64())
   114  					p.countBlockChainTrie(header.Root, true)
   115  					p.processScanData(i)
   116  
   117  					p.bc.MuUnLock()
   118  
   119  					if p.pruneBodyData {
   120  						for j := pruneBodyStart; j <= i; j++ {
   121  							rawdb.DeleteBody(p.chainDb, rawdb.ReadCanonicalHash(p.chainDb, i), i)
   122  						}
   123  						log.Infof("deleted block from %v to %v", pruneBodyStart, i-1)
   124  						pruneBodyStart = i
   125  					}
   126  				}
   127  				//log.Infof("countBlockChainTrie for block %d", i)
   128  			}
   129  
   130  			blockNumber = p.bc.CurrentBlock().NumberU64()
   131  			scanNumber = scanEnd + 1
   132  			pruneNumber = scanEnd
   133  
   134  		} else {
   135  			time.Sleep(5 * time.Second) //sleep 5 seconds to wait more blocks to prune
   136  			blockNumber = p.bc.CurrentBlock().NumberU64()
   137  		}
   138  	}
   139  
   140  	return scanEnd + 1, scanEnd
   141  }
   142  
   143  func calculateScan(scan, latestBlockHeight uint64) (scanOrNot bool, from, to uint64) {
   144  
   145  	from = scan
   146  	to = 0
   147  
   148  	unscanHeight := latestBlockHeight - scan
   149  	if unscanHeight > max_remain_trie {
   150  		to = latestBlockHeight - max_remain_trie
   151  	}
   152  
   153  	if to != 0 {
   154  		scanOrNot = true
   155  	}
   156  
   157  	return
   158  }
   159  
   160  func (p *PruneProcessor) readLatestNodeCount(scanNumber, pruneNumber uint64) NodeCount {
   161  	nodeCount := make(NodeCount)
   162  
   163  	lastHash := rawdb.ReadDataPruneTrieRootHash(p.db, scanNumber, pruneNumber)
   164  	if (lastHash != common.Hash{}) {
   165  		lastPruneTrie, openErr := p.prunedb.OpenPruneTrie(lastHash)
   166  		if openErr != nil {
   167  			log.Error("Data Reduction - Unable read the last Prune Trie.", "err", openErr)
   168  		} else {
   169  			it := trie.NewIterator(lastPruneTrie.NodeIterator(nil))
   170  			for it.Next() {
   171  				nodeHash := common.BytesToHash(lastPruneTrie.GetKey(it.Key))
   172  				var nodeHashCount uint64
   173  				rlp.DecodeBytes(it.Value, &nodeHashCount)
   174  				nodeCount[nodeHash] = nodeHashCount
   175  			}
   176  		}
   177  	}
   178  	return nodeCount
   179  }
   180  
   181  func (p *PruneProcessor) countBlockChainTrie(root common.Hash, markNoPrune bool) (skip bool) {
   182  	t, openErr := p.bc.StateCache().OpenTrie(root)
   183  	if openErr != nil {
   184  		if _, ok := openErr.(*trie.MissingNodeError); ok {
   185  			// Missing Node Error means the root node of the trie has been removed earlier, so skip the trie and return
   186  			skip = true
   187  		} else {
   188  			log.Error("Data Reduction - Error when open the Main Trie", "err", openErr, "stateroot", root)
   189  		}
   190  		return
   191  	}
   192  
   193  	countTrie(t, p.nodeCount, markNoPrune, func(addr common.Address, account state.Account) {
   194  		if account.Root != emptyRoot {
   195  			if storageTrie, stErr := p.bc.StateCache().OpenStorageTrie(common.Hash{}, account.Root); stErr == nil {
   196  				countTrie(storageTrie, p.nodeCount, markNoPrune, nil)
   197  			} else {
   198  				log.Error("Data Reduction - Error when open the Storage Trie", "err", stErr, "storageroot", account.Root, "account", addr)
   199  			}
   200  		}
   201  
   202  		if account.TX1Root != emptyRoot {
   203  			if tx1Trie, tx1Err := p.bc.StateCache().OpenTX1Trie(common.Hash{}, account.TX1Root); tx1Err == nil {
   204  				countTrie(tx1Trie, p.nodeCount, markNoPrune, nil)
   205  			} else {
   206  				log.Error("Data Reduction - Error when open the TX1 Trie", "err", tx1Err, "tx1root", account.TX1Root, "account", addr)
   207  			}
   208  		}
   209  
   210  		if account.TX3Root != emptyRoot {
   211  			if tx3Trie, tx3Err := p.bc.StateCache().OpenTX3Trie(common.Hash{}, account.TX3Root); tx3Err == nil {
   212  				countTrie(tx3Trie, p.nodeCount, markNoPrune, nil)
   213  			} else {
   214  				log.Error("Data Reduction - Error when open the TX3 Trie", "err", tx3Err, "tx3root", account.TX3Root, "account", addr)
   215  			}
   216  		}
   217  
   218  		if account.ProxiedRoot != emptyRoot {
   219  			if proxiedTrie, proxiedErr := p.bc.StateCache().OpenProxiedTrie(common.Hash{}, account.ProxiedRoot); proxiedErr == nil {
   220  				countTrie(proxiedTrie, p.nodeCount, markNoPrune, nil)
   221  			} else {
   222  				log.Error("Data Reduction - Error when open the Proxied Trie", "err", proxiedErr, "proxiedroot", account.ProxiedRoot, "account", addr)
   223  			}
   224  		}
   225  
   226  		if account.RewardRoot != emptyRoot {
   227  			if rewardTrie, rewardErr := p.bc.StateCache().OpenRewardTrie(common.Hash{}, account.RewardRoot); rewardErr == nil {
   228  				countTrie(rewardTrie, p.nodeCount, markNoPrune, nil)
   229  			} else {
   230  				log.Error("Data Reduction - Error when open the Reward Trie", "err", rewardErr, "rewardroot", account.RewardRoot, "account", addr)
   231  			}
   232  		}
   233  	})
   234  	return
   235  }
   236  
   237  func countTrie(t state.Trie, nodeCount NodeCount, markNoPrune bool, processLeaf processLeafTrie) {
   238  
   239  	child := true
   240  	if !markNoPrune {
   241  		for it := t.NodeIterator(nil); it.Next(child); {
   242  			if !it.Leaf() {
   243  				nodeHash := it.Hash()
   244  				if _, exist := nodeCount[nodeHash]; exist {
   245  					child = false
   246  				} else {
   247  					nodeCount[nodeHash] = 0 //this node occurs, may need prune
   248  					child = true
   249  				}
   250  			} else {
   251  				// Process the Account -> Inner Trie
   252  				if processLeaf != nil {
   253  					addr := t.GetKey(it.LeafKey())
   254  					if len(addr) == 20 {
   255  						var data state.Account
   256  						rlp.DecodeBytes(it.LeafBlob(), &data)
   257  
   258  						processLeaf(common.BytesToAddress(addr), data)
   259  					}
   260  				}
   261  			}
   262  		}
   263  	} else {
   264  		for it := t.NodeIterator(nil); it.Next(child); {
   265  			if !it.Leaf() {
   266  				nodeHash := it.Hash()
   267  				nodeCount[nodeHash] = 1 //this node occurs in the latest block, mark no prune
   268  			} else {
   269  				// Process the Account -> Inner Trie
   270  				if processLeaf != nil {
   271  					addr := t.GetKey(it.LeafKey())
   272  					if len(addr) == 20 {
   273  						var data state.Account
   274  						rlp.DecodeBytes(it.LeafBlob(), &data)
   275  
   276  						processLeaf(common.BytesToAddress(addr), data)
   277  					}
   278  				}
   279  			}
   280  		}
   281  	}
   282  }
   283  
   284  func (p *PruneProcessor) processScanData(latestScanNumber uint64) uint64 {
   285  
   286  	log.Infof("Data Reduction - After Scan, lastest scan number: %d", latestScanNumber)
   287  
   288  	// Prune State Data
   289  	p.pruneData()
   290  
   291  	newPruneNumber := latestScanNumber
   292  
   293  	// Commit the new scaned/pruned node count to trie
   294  	p.writeLastNumber(latestScanNumber, newPruneNumber)
   295  
   296  	log.Infof("Data Reduction - Scan/Prune Completed for trie %d %d", latestScanNumber, newPruneNumber)
   297  	return newPruneNumber
   298  }
   299  
   300  func (p *PruneProcessor) pruneData() {
   301  
   302  	count := 0
   303  
   304  	batch := p.chainDb.NewBatch()
   305  	for node, latest := range p.nodeCount {
   306  		if latest == 0 {
   307  			if batchDeleteError := batch.Delete(node.Bytes()); batchDeleteError != nil {
   308  				log.Error("Data Reduction - Error when delete the hash from chaindb", "err", batchDeleteError, "hash", node)
   309  			}
   310  			delete(p.nodeCount, node)
   311  			count++
   312  		} else {
   313  			p.nodeCount[node] = 0
   314  		}
   315  	}
   316  
   317  	log.Infof("Data Reduction - %d hashes will be deleted from chaindb", count)
   318  	if writeErr := batch.Write(); writeErr != nil {
   319  		log.Error("Data Reduction - Error when write the deletion batch", "err", writeErr)
   320  	} else {
   321  		log.Infof("Data Reduction - write the deletion batch success, delete %v hashes", count)
   322  	}
   323  }
   324  
   325  func (p *PruneProcessor) writeLastNumber(lastScanNumber, lastPruneNumber uint64) {
   326  	rawdb.WriteHeadScanNumber(p.db, lastScanNumber)
   327  	rawdb.WriteHeadPruneNumber(p.db, lastPruneNumber)
   328  }
   329  
   330  func (nc NodeCount) String() string {
   331  	list := make([]common.Hash, 0, len(nc))
   332  	for key := range nc {
   333  		list = append(list, key)
   334  	}
   335  	sort.Slice(list, func(i, j int) bool {
   336  		return bytes.Compare(list[i].Bytes(), list[j].Bytes()) == 1
   337  	})
   338  
   339  	result := ""
   340  	for _, key := range list {
   341  		result += fmt.Sprintf("%v: %d \n", key.Hex(), nc[key])
   342  	}
   343  	return result
   344  }
   345  
   346  func GetLatestStatus(prunedb intdb.Database) *PruneStatus {
   347  	var scanNo, pruneNo uint64
   348  	if ps := rawdb.ReadHeadScanNumber(prunedb); ps != nil {
   349  		scanNo = *ps
   350  	}
   351  	if pp := rawdb.ReadHeadPruneNumber(prunedb); pp != nil {
   352  		pruneNo = *pp
   353  	}
   354  
   355  	return &PruneStatus{
   356  		Running:           atomic.LoadInt32(&pruning) == 1,
   357  		LatestScanNumber:  scanNo,
   358  		LatestPruneNumber: pruneNo,
   359  	}
   360  }
   361  
   362  /*
   363  func (p *PruneProcessor) pruneBlockChainTrie(root common.Hash, nodeCount NodeCount) {
   364  	t, openErr := p.bc.StateCache().OpenTrie(root)
   365  	if openErr != nil {
   366  		log.Error("Data Reduction - Error when open the Main Trie", "err", openErr, "stateroot", root)
   367  		return
   368  	}
   369  
   370  	pruneTrie(t, nodeCount, &p.pendingDeleteHashList, func(addr common.Address, account state.Account) {
   371  		if account.Root != emptyRoot {
   372  			if storageTrie, stErr := p.bc.StateCache().OpenStorageTrie(common.Hash{}, account.Root); stErr == nil {
   373  				pruneTrie(storageTrie, nodeCount, &p.pendingDeleteHashList, nil)
   374  			} else {
   375  				log.Error("Data Reduction - Error when open the Storage Trie", "err", stErr, "storageroot", account.Root, "account", addr)
   376  			}
   377  		}
   378  
   379  		if account.TX1Root != emptyRoot {
   380  			if tx1Trie, tx1Err := p.bc.StateCache().OpenTX1Trie(common.Hash{}, account.TX1Root); tx1Err == nil {
   381  				pruneTrie(tx1Trie, nodeCount, &p.pendingDeleteHashList, nil)
   382  			} else {
   383  				log.Error("Data Reduction - Error when open the TX1 Trie", "err", tx1Err, "tx1root", account.TX1Root, "account", addr)
   384  			}
   385  		}
   386  
   387  		if account.TX3Root != emptyRoot {
   388  			if tx3Trie, tx3Err := p.bc.StateCache().OpenTX3Trie(common.Hash{}, account.TX3Root); tx3Err == nil {
   389  				pruneTrie(tx3Trie, nodeCount, &p.pendingDeleteHashList, nil)
   390  			} else {
   391  				log.Error("Data Reduction - Error when open the TX3 Trie", "err", tx3Err, "tx3root", account.TX3Root, "account", addr)
   392  			}
   393  		}
   394  
   395  		if account.ProxiedRoot != emptyRoot {
   396  			if proxiedTrie, proxiedErr := p.bc.StateCache().OpenProxiedTrie(common.Hash{}, account.ProxiedRoot); proxiedErr == nil {
   397  				pruneTrie(proxiedTrie, nodeCount, &p.pendingDeleteHashList, nil)
   398  			} else {
   399  				log.Error("Data Reduction - Error when open the Proxied Trie", "err", proxiedErr, "proxiedroot", account.ProxiedRoot, "account", addr)
   400  			}
   401  		}
   402  
   403  		if account.RewardRoot != emptyRoot {
   404  			if rewardTrie, rewardErr := p.bc.StateCache().OpenRewardTrie(common.Hash{}, account.RewardRoot); rewardErr == nil {
   405  				pruneTrie(rewardTrie, nodeCount, &p.pendingDeleteHashList, nil)
   406  			} else {
   407  				log.Error("Data Reduction - Error when open the Reward Trie", "err", rewardErr, "rewardroot", account.RewardRoot, "account", addr)
   408  			}
   409  		}
   410  	})
   411  
   412  }
   413  
   414  func pruneTrie(t state.Trie, nodeCount NodeCount, pendingDeleteHashList *[]common.Hash, processLeaf processLeafTrie) {
   415  	child := true
   416  	for it := t.NodeIterator(nil); it.Next(child); {
   417  		if !it.Leaf() {
   418  			nodeHash := it.Hash()
   419  			if nodeCount[nodeHash] > 0 {
   420  				nodeCount[nodeHash]--
   421  			}
   422  
   423  			if nodeCount[nodeHash] == 0 {
   424  				child = true
   425  				*pendingDeleteHashList = append(*pendingDeleteHashList, nodeHash)
   426  				delete(nodeCount, nodeHash)
   427  			} else {
   428  				child = false
   429  			}
   430  		} else {
   431  			// Process the Account -> Inner Trie
   432  			if processLeaf != nil {
   433  				addr := t.GetKey(it.LeafKey())
   434  				if len(addr) == 20 {
   435  					var data state.Account
   436  					rlp.DecodeBytes(it.LeafBlob(), &data)
   437  
   438  					processLeaf(common.BytesToAddress(addr), data)
   439  				}
   440  			}
   441  		}
   442  	}
   443  }
   444  
   445  func (p *PruneProcessor) commitDataPruneTrie(nodeCount NodeCount, lastScanNumber, lastPruneNumber uint64) {
   446  	// Store the Node Count into data prune trie
   447  	// Commit the Prune Trie
   448  	pruneTrie, _ := p.prunedb.OpenPruneTrie(common.Hash{})
   449  
   450  	for key, count := range nodeCount {
   451  		value, _ := rlp.EncodeToBytes(count)
   452  		pruneTrie.TryUpdate(key[:], value)
   453  	}
   454  	pruneTrieRoot, commit_err := pruneTrie.Commit(nil)
   455  	log.Info("Data Reduction - Commit Prune Trie", "hash", pruneTrieRoot.Hex(), "err", commit_err)
   456  	// Commit to Prune DB
   457  	db_commit_err := p.prunedb.TrieDB().Commit(pruneTrieRoot, true)
   458  	log.Info("Data Reduction - Write to Prune DB", "err", db_commit_err)
   459  
   460  	// Write the Root Hash of Prune Trie
   461  	rawdb.WriteDataPruneTrieRootHash(p.db, pruneTrieRoot, lastScanNumber, lastPruneNumber)
   462  	// Write the last number
   463  	p.writeLastNumber(lastScanNumber, lastPruneNumber)
   464  }
   465  */