
     1  // Copyright 2020 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <>.
    17  package pruner
    19  import (
    20  	"bytes"
    21  	"encoding/binary"
    22  	"errors"
    23  	"fmt"
    24  	"math"
    25  	"os"
    26  	"path/filepath"
    27  	"strings"
    28  	"time"
    30  	""
    31  	""
    32  	""
    33  	""
    34  	""
    35  	""
    36  	""
    37  	""
    38  	""
    39  	""
    40  )
    42  const (
    43  	// stateBloomFilePrefix is the filename prefix of state bloom filter.
    44  	stateBloomFilePrefix = "statebloom"
    46  	// stateBloomFilePrefix is the filename suffix of state bloom filter.
    47  	stateBloomFileSuffix = "bf.gz"
    49  	// stateBloomFileTempSuffix is the filename suffix of state bloom filter
    50  	// while it is being written out to detect write aborts.
    51  	stateBloomFileTempSuffix = ".tmp"
    53  	// rangeCompactionThreshold is the minimal deleted entry number for
    54  	// triggering range compaction. It's a quite arbitrary number but just
    55  	// to avoid triggering range compaction because of small deletion.
    56  	rangeCompactionThreshold = 100000
    57  )
    59  var (
    60  	// emptyRoot is the known root hash of an empty trie.
    61  	emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
    63  	// emptyCode is the known hash of the empty EVM bytecode.
    64  	emptyCode = crypto.Keccak256(nil)
    65  )
    67  // Pruner is an offline tool to prune the stale state with the
    68  // help of the snapshot. The workflow of pruner is very simple:
    69  //
    70  // - iterate the snapshot, reconstruct the relevant state
    71  // - iterate the database, delete all other state entries which
    72  //   don't belong to the target state and the genesis state
    73  //
    74  // It can take several hours(around 2 hours for mainnet) to finish
    75  // the whole pruning work. It's recommended to run this offline tool
    76  // periodically in order to release the disk usage and improve the
    77  // disk read performance to some extent.
    78  type Pruner struct {
    79  	db            ethdb.Database
    80  	stateBloom    *stateBloom
    81  	datadir       string
    82  	trieCachePath string
    83  	headHeader    *types.Header
    84  	snaptree      *snapshot.Tree
    85  	triesInMemory uint64
    86  }
    88  // NewPruner creates the pruner instance.
    89  func NewPruner(db ethdb.Database, datadir, trieCachePath string, bloomSize, triesInMemory uint64) (*Pruner, error) {
    90  	headBlock := rawdb.ReadHeadBlock(db)
    91  	if headBlock == nil {
    92  		return nil, errors.New("Failed to load head block")
    93  	}
    94  	snaptree, err := snapshot.New(db, trie.NewDatabase(db), 256, int(triesInMemory), headBlock.Root(), false, false, false)
    95  	if err != nil {
    96  		return nil, err // The relevant snapshot(s) might not exist
    97  	}
    98  	// Sanitize the bloom filter size if it's too small.
    99  	if bloomSize < 256 {
   100  		log.Warn("Sanitizing bloomfilter size", "provided(MB)", bloomSize, "updated(MB)", 256)
   101  		bloomSize = 256
   102  	}
   103  	stateBloom, err := newStateBloomWithSize(bloomSize)
   104  	if err != nil {
   105  		return nil, err
   106  	}
   107  	return &Pruner{
   108  		db:            db,
   109  		stateBloom:    stateBloom,
   110  		datadir:       datadir,
   111  		trieCachePath: trieCachePath,
   112  		triesInMemory: triesInMemory,
   113  		headHeader:    headBlock.Header(),
   114  		snaptree:      snaptree,
   115  	}, nil
   116  }
   118  func prune(snaptree *snapshot.Tree, root common.Hash, maindb ethdb.Database, stateBloom *stateBloom, bloomPath string, middleStateRoots map[common.Hash]struct{}, start time.Time) error {
   119  	// Delete all stale trie nodes in the disk. With the help of state bloom
   120  	// the trie nodes(and codes) belong to the active state will be filtered
   121  	// out. A very small part of stale tries will also be filtered because of
   122  	// the false-positive rate of bloom filter. But the assumption is held here
   123  	// that the false-positive is low enough(~0.05%). The probablity of the
   124  	// dangling node is the state root is super low. So the dangling nodes in
   125  	// theory will never ever be visited again.
   126  	var (
   127  		count  int
   128  		size   common.StorageSize
   129  		pstart = time.Now()
   130  		logged = time.Now()
   131  		batch  = maindb.NewBatch()
   132  		iter   = maindb.NewIterator(nil, nil)
   133  	)
   134  	for iter.Next() {
   135  		key := iter.Key()
   137  		// All state entries don't belong to specific state and genesis are deleted here
   138  		// - trie node
   139  		// - legacy contract code
   140  		// - new-scheme contract code
   141  		isCode, codeKey := rawdb.IsCodeKey(key)
   142  		if len(key) == common.HashLength || isCode {
   143  			checkKey := key
   144  			if isCode {
   145  				checkKey = codeKey
   146  			}
   147  			if _, exist := middleStateRoots[common.BytesToHash(checkKey)]; exist {
   148  				log.Debug("Forcibly delete the middle state roots", "hash", common.BytesToHash(checkKey))
   149  			} else {
   150  				if ok, err := stateBloom.Contain(checkKey); err != nil {
   151  					return err
   152  				} else if ok {
   153  					continue
   154  				}
   155  			}
   156  			count += 1
   157  			size += common.StorageSize(len(key) + len(iter.Value()))
   158  			batch.Delete(key)
   160  			var eta time.Duration // Realistically will never remain uninited
   161  			if done := binary.BigEndian.Uint64(key[:8]); done > 0 {
   162  				var (
   163  					left  = math.MaxUint64 - binary.BigEndian.Uint64(key[:8])
   164  					speed = done/uint64(time.Since(pstart)/time.Millisecond+1) + 1 // +1s to avoid division by zero
   165  				)
   166  				eta = time.Duration(left/speed) * time.Millisecond
   167  			}
   168  			if time.Since(logged) > 8*time.Second {
   169  				log.Info("Pruning state data", "nodes", count, "size", size,
   170  					"elapsed", common.PrettyDuration(time.Since(pstart)), "eta", common.PrettyDuration(eta))
   171  				logged = time.Now()
   172  			}
   173  			// Recreate the iterator after every batch commit in order
   174  			// to allow the underlying compactor to delete the entries.
   175  			if batch.ValueSize() >= ethdb.IdealBatchSize {
   176  				batch.Write()
   177  				batch.Reset()
   179  				iter.Release()
   180  				iter = maindb.NewIterator(nil, key)
   181  			}
   182  		}
   183  	}
   184  	if batch.ValueSize() > 0 {
   185  		batch.Write()
   186  		batch.Reset()
   187  	}
   188  	iter.Release()
   189  	log.Info("Pruned state data", "nodes", count, "size", size, "elapsed", common.PrettyDuration(time.Since(pstart)))
   191  	// Pruning is done, now drop the "useless" layers from the snapshot.
   192  	// Firstly, flushing the target layer into the disk. After that all
   193  	// diff layers below the target will all be merged into the disk.
   194  	if root != snaptree.DiskRoot() {
   195  		if err := snaptree.Cap(root, 0); err != nil {
   196  			return err
   197  		}
   198  	}
   199  	// Secondly, flushing the snapshot journal into the disk. All diff
   200  	// layers upon are dropped silently. Eventually the entire snapshot
   201  	// tree is converted into a single disk layer with the pruning target
   202  	// as the root.
   203  	if _, err := snaptree.Journal(root); err != nil {
   204  		return err
   205  	}
   206  	// Delete the state bloom, it marks the entire pruning procedure is
   207  	// finished. If any crashes or manual exit happens before this,
   208  	// `RecoverPruning` will pick it up in the next restarts to redo all
   209  	// the things.
   210  	os.RemoveAll(bloomPath)
   212  	// Start compactions, will remove the deleted data from the disk immediately.
   213  	// Note for small pruning, the compaction is skipped.
   214  	if count >= rangeCompactionThreshold {
   215  		cstart := time.Now()
   216  		for b := 0x00; b <= 0xf0; b += 0x10 {
   217  			var (
   218  				start = []byte{byte(b)}
   219  				end   = []byte{byte(b + 0x10)}
   220  			)
   221  			if b == 0xf0 {
   222  				end = nil
   223  			}
   224  			log.Info("Compacting database", "range", fmt.Sprintf("%#x-%#x", start, end), "elapsed", common.PrettyDuration(time.Since(cstart)))
   225  			if err := maindb.Compact(start, end); err != nil {
   226  				log.Error("Database compaction failed", "error", err)
   227  				return err
   228  			}
   229  		}
   230  		log.Info("Database compaction finished", "elapsed", common.PrettyDuration(time.Since(cstart)))
   231  	}
   232  	log.Info("State pruning successful", "pruned", size, "elapsed", common.PrettyDuration(time.Since(start)))
   233  	return nil
   234  }
   236  // Prune deletes all historical state nodes except the nodes belong to the
   237  // specified state version. If user doesn't specify the state version, use
   238  // the bottom-most snapshot diff layer as the target.
   239  func (p *Pruner) Prune(root common.Hash) error {
   240  	// If the state bloom filter is already committed previously,
   241  	// reuse it for pruning instead of generating a new one. It's
   242  	// mandatory because a part of state may already be deleted,
   243  	// the recovery procedure is necessary.
   244  	_, stateBloomRoot, err := findBloomFilter(p.datadir)
   245  	if err != nil {
   246  		return err
   247  	}
   248  	if stateBloomRoot != (common.Hash{}) {
   249  		return RecoverPruning(p.datadir, p.db, p.trieCachePath, p.triesInMemory)
   250  	}
   251  	// If the target state root is not specified, use the HEAD-(n-1) as the
   252  	// target. The reason for picking it is:
   253  	// - in most of the normal cases, the related state is available
   254  	// - the probability of this layer being reorg is very low
   255  	var layers []snapshot.Snapshot
   256  	if root == (common.Hash{}) {
   257  		// Retrieve all snapshot layers from the current HEAD.
   258  		// In theory there are n difflayers + 1 disk layer present,
   259  		// so n diff layers are expected to be returned.
   260  		layers = p.snaptree.Snapshots(p.headHeader.Root, int(p.triesInMemory), true)
   261  		if len(layers) != int(p.triesInMemory) {
   262  			// Reject if the accumulated diff layers are less than n. It
   263  			// means in most of normal cases, there is no associated state
   264  			// with bottom-most diff layer.
   265  			return fmt.Errorf("snapshot not old enough yet: need %d more blocks", int(p.triesInMemory)-len(layers))
   266  		}
   267  		// Use the bottom-most diff layer as the target
   268  		root = layers[len(layers)-1].Root()
   269  	}
   270  	// Ensure the root is really present. The weak assumption
   271  	// is the presence of root can indicate the presence of the
   272  	// entire trie.
   273  	if blob := rawdb.ReadTrieNode(p.db, root); len(blob) == 0 {
   274  		// The special case is for clique based networks(rinkeby, goerli
   275  		// and some other private networks), it's possible that two
   276  		// consecutive blocks will have same root. In this case snapshot
   277  		// difflayer won't be created. So HEAD-(n-1) may not paired with
   278  		// head-(n-1) layer. Instead the paired layer is higher than the
   279  		// bottom-most diff layer. Try to find the bottom-most snapshot
   280  		// layer with state available.
   281  		//
   282  		// Note HEAD is ignored. Usually there is the associated
   283  		// state available, but we don't want to use the topmost state
   284  		// as the pruning target.
   285  		var found bool
   286  		for i := len(layers) - 2; i >= 1; i-- {
   287  			if blob := rawdb.ReadTrieNode(p.db, layers[i].Root()); len(blob) != 0 {
   288  				root = layers[i].Root()
   289  				found = true
   290  				log.Info("Selecting middle-layer as the pruning target", "root", root, "depth", i)
   291  				break
   292  			}
   293  		}
   294  		if !found {
   295  			if blob := rawdb.ReadTrieNode(p.db, p.snaptree.DiskRoot()); len(blob) != 0 {
   296  				root = p.snaptree.DiskRoot()
   297  				found = true
   298  				log.Info("Selecting disk-layer as the pruning target", "root", root)
   299  			}
   300  		}
   301  		if !found {
   302  			if len(layers) > 0 {
   303  				return errors.New("no snapshot paired state")
   304  			}
   305  			return fmt.Errorf("associated state[%x] is not present", root)
   306  		}
   307  	} else {
   308  		if len(layers) > 0 {
   309  			log.Info("Selecting bottom-most difflayer as the pruning target", "root", root, "height", p.headHeader.Number.Uint64()-127)
   310  		} else {
   311  			log.Info("Selecting user-specified state as the pruning target", "root", root)
   312  		}
   313  	}
   314  	// Before start the pruning, delete the clean trie cache first.
   315  	// It's necessary otherwise in the next restart we will hit the
   316  	// deleted state root in the "clean cache" so that the incomplete
   317  	// state is picked for usage.
   318  	deleteCleanTrieCache(p.trieCachePath)
   320  	// All the state roots of the middle layer should be forcibly pruned,
   321  	// otherwise the dangling state will be left.
   322  	middleRoots := make(map[common.Hash]struct{})
   323  	for _, layer := range layers {
   324  		if layer.Root() == root {
   325  			break
   326  		}
   327  		middleRoots[layer.Root()] = struct{}{}
   328  	}
   329  	// Traverse the target state, re-construct the whole state trie and
   330  	// commit to the given bloom filter.
   331  	start := time.Now()
   332  	if err := snapshot.GenerateTrie(p.snaptree, root, p.db, p.stateBloom); err != nil {
   333  		return err
   334  	}
   335  	// Traverse the genesis, put all genesis state entries into the
   336  	// bloom filter too.
   337  	if err := extractGenesis(p.db, p.stateBloom); err != nil {
   338  		return err
   339  	}
   340  	filterName := bloomFilterName(p.datadir, root)
   342  	log.Info("Writing state bloom to disk", "name", filterName)
   343  	if err := p.stateBloom.Commit(filterName, filterName+stateBloomFileTempSuffix); err != nil {
   344  		return err
   345  	}
   346  	log.Info("State bloom filter committed", "name", filterName)
   347  	return prune(p.snaptree, root, p.db, p.stateBloom, filterName, middleRoots, start)
   348  }
   350  // RecoverPruning will resume the pruning procedure during the system restart.
   351  // This function is used in this case: user tries to prune state data, but the
   352  // system was interrupted midway because of crash or manual-kill. In this case
   353  // if the bloom filter for filtering active state is already constructed, the
   354  // pruning can be resumed. What's more if the bloom filter is constructed, the
   355  // pruning **has to be resumed**. Otherwise a lot of dangling nodes may be left
   356  // in the disk.
   357  func RecoverPruning(datadir string, db ethdb.Database, trieCachePath string, triesInMemory uint64) error {
   358  	stateBloomPath, stateBloomRoot, err := findBloomFilter(datadir)
   359  	if err != nil {
   360  		return err
   361  	}
   362  	if stateBloomPath == "" {
   363  		return nil // nothing to recover
   364  	}
   365  	headBlock := rawdb.ReadHeadBlock(db)
   366  	if headBlock == nil {
   367  		return errors.New("Failed to load head block")
   368  	}
   369  	// Initialize the snapshot tree in recovery mode to handle this special case:
   370  	// - Users run the `prune-state` command multiple times
   371  	// - Neither these `prune-state` running is finished(e.g. interrupted manually)
   372  	// - The state bloom filter is already generated, a part of state is deleted,
   373  	//   so that resuming the pruning here is mandatory
   374  	// - The state HEAD is rewound already because of multiple incomplete `prune-state`
   375  	// In this case, even the state HEAD is not exactly matched with snapshot, it
   376  	// still feasible to recover the pruning correctly.
   377  	snaptree, err := snapshot.New(db, trie.NewDatabase(db), 256, int(triesInMemory), headBlock.Root(), false, false, true)
   378  	if err != nil {
   379  		return err // The relevant snapshot(s) might not exist
   380  	}
   381  	stateBloom, err := NewStateBloomFromDisk(stateBloomPath)
   382  	if err != nil {
   383  		return err
   384  	}
   385  	log.Info("Loaded state bloom filter", "path", stateBloomPath)
   387  	// Before start the pruning, delete the clean trie cache first.
   388  	// It's necessary otherwise in the next restart we will hit the
   389  	// deleted state root in the "clean cache" so that the incomplete
   390  	// state is picked for usage.
   391  	deleteCleanTrieCache(trieCachePath)
   393  	// All the state roots of the middle layers should be forcibly pruned,
   394  	// otherwise the dangling state will be left.
   395  	var (
   396  		found       bool
   397  		layers      = snaptree.Snapshots(headBlock.Root(), int(triesInMemory), true)
   398  		middleRoots = make(map[common.Hash]struct{})
   399  	)
   400  	for _, layer := range layers {
   401  		if layer.Root() == stateBloomRoot {
   402  			found = true
   403  			break
   404  		}
   405  		middleRoots[layer.Root()] = struct{}{}
   406  	}
   407  	if !found {
   408  		log.Error("Pruning target state is not existent")
   409  		return errors.New("non-existent target state")
   410  	}
   411  	return prune(snaptree, stateBloomRoot, db, stateBloom, stateBloomPath, middleRoots, time.Now())
   412  }
   414  // extractGenesis loads the genesis state and commits all the state entries
   415  // into the given bloomfilter.
   416  func extractGenesis(db ethdb.Database, stateBloom *stateBloom) error {
   417  	genesisHash := rawdb.ReadCanonicalHash(db, 0)
   418  	if genesisHash == (common.Hash{}) {
   419  		return errors.New("missing genesis hash")
   420  	}
   421  	genesis := rawdb.ReadBlock(db, genesisHash, 0)
   422  	if genesis == nil {
   423  		return errors.New("missing genesis block")
   424  	}
   425  	t, err := trie.NewSecure(genesis.Root(), trie.NewDatabase(db))
   426  	if err != nil {
   427  		return err
   428  	}
   429  	accIter := t.NodeIterator(nil)
   430  	for accIter.Next(true) {
   431  		hash := accIter.Hash()
   433  		// Embedded nodes don't have hash.
   434  		if hash != (common.Hash{}) {
   435  			stateBloom.Put(hash.Bytes(), nil)
   436  		}
   437  		// If it's a leaf node, yes we are touching an account,
   438  		// dig into the storage trie further.
   439  		if accIter.Leaf() {
   440  			var acc state.Account
   441  			if err := rlp.DecodeBytes(accIter.LeafBlob(), &acc); err != nil {
   442  				return err
   443  			}
   444  			if acc.Root != emptyRoot {
   445  				storageTrie, err := trie.NewSecure(acc.Root, trie.NewDatabase(db))
   446  				if err != nil {
   447  					return err
   448  				}
   449  				storageIter := storageTrie.NodeIterator(nil)
   450  				for storageIter.Next(true) {
   451  					hash := storageIter.Hash()
   452  					if hash != (common.Hash{}) {
   453  						stateBloom.Put(hash.Bytes(), nil)
   454  					}
   455  				}
   456  				if storageIter.Error() != nil {
   457  					return storageIter.Error()
   458  				}
   459  			}
   460  			if !bytes.Equal(acc.CodeHash, emptyCode) {
   461  				stateBloom.Put(acc.CodeHash, nil)
   462  			}
   463  		}
   464  	}
   465  	return accIter.Error()
   466  }
   468  func bloomFilterName(datadir string, hash common.Hash) string {
   469  	return filepath.Join(datadir, fmt.Sprintf("%s.%s.%s", stateBloomFilePrefix, hash.Hex(), stateBloomFileSuffix))
   470  }
   472  func isBloomFilter(filename string) (bool, common.Hash) {
   473  	filename = filepath.Base(filename)
   474  	if strings.HasPrefix(filename, stateBloomFilePrefix) && strings.HasSuffix(filename, stateBloomFileSuffix) {
   475  		return true, common.HexToHash(filename[len(stateBloomFilePrefix)+1 : len(filename)-len(stateBloomFileSuffix)-1])
   476  	}
   477  	return false, common.Hash{}
   478  }
   480  func findBloomFilter(datadir string) (string, common.Hash, error) {
   481  	var (
   482  		stateBloomPath string
   483  		stateBloomRoot common.Hash
   484  	)
   485  	if err := filepath.Walk(datadir, func(path string, info os.FileInfo, err error) error {
   486  		if info != nil && !info.IsDir() {
   487  			ok, root := isBloomFilter(path)
   488  			if ok {
   489  				stateBloomPath = path
   490  				stateBloomRoot = root
   491  			}
   492  		}
   493  		return nil
   494  	}); err != nil {
   495  		return "", common.Hash{}, err
   496  	}
   497  	return stateBloomPath, stateBloomRoot, nil
   498  }
   500  const warningLog = `
   502  WARNING!
   504  The clean trie cache is not found. Please delete it by yourself after the 
   505  pruning. Remember don't start the Geth without deleting the clean trie cache
   506  otherwise the entire database may be damaged!
   508  Check the command description "geth snapshot prune-state --help" for more details.
   509  `
   511  func deleteCleanTrieCache(path string) {
   512  	if _, err := os.Stat(path); os.IsNotExist(err) {
   513  		log.Warn(warningLog)
   514  		return
   515  	}
   516  	os.RemoveAll(path)
   517  	log.Info("Deleted trie clean cache", "path", path)
   518  }