github.com/tacshi/go-ethereum@v0.0.0-20230616113857-84a434e20921/core/state/pruner/pruner.go (about)

     1  // Copyright 2021 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package pruner
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/binary"
    22  	"errors"
    23  	"fmt"
    24  	"math"
    25  	"os"
    26  	"path/filepath"
    27  	"runtime"
    28  	"sync"
    29  	"time"
    30  
    31  	"github.com/tacshi/go-ethereum/common"
    32  	"github.com/tacshi/go-ethereum/core/rawdb"
    33  	"github.com/tacshi/go-ethereum/core/state"
    34  	"github.com/tacshi/go-ethereum/core/state/snapshot"
    35  	"github.com/tacshi/go-ethereum/core/types"
    36  	"github.com/tacshi/go-ethereum/ethdb"
    37  	"github.com/tacshi/go-ethereum/log"
    38  	"github.com/tacshi/go-ethereum/params"
    39  	"github.com/tacshi/go-ethereum/rlp"
    40  	"github.com/tacshi/go-ethereum/trie"
    41  )
    42  
    43  const (
    44  	// stateBloomFileName is the filename of state bloom filter.
    45  	stateBloomFileName = "statebloom.bf.gz"
    46  
    47  	// stateBloomFileTempSuffix is the filename suffix of state bloom filter
    48  	// while it is being written out to detect write aborts.
    49  	stateBloomFileTempSuffix = ".tmp"
    50  
    51  	// rangeCompactionThreshold is the minimal deleted entry number for
    52  	// triggering range compaction. It's a quite arbitrary number but just
    53  	// to avoid triggering range compaction because of small deletion.
    54  	rangeCompactionThreshold = 100000
    55  )
    56  
    57  // Config includes all the configurations for pruning.
    58  type Config struct {
    59  	Datadir   string // The directory of the state database
    60  	Cachedir  string // The directory of state clean cache
    61  	BloomSize uint64 // The Megabytes of memory allocated to bloom-filter
    62  }
    63  
    64  // Pruner is an offline tool to prune the stale state with the
    65  // help of the snapshot. The workflow of pruner is very simple:
    66  //
    67  //   - iterate the snapshot, reconstruct the relevant state
    68  //   - iterate the database, delete all other state entries which
    69  //     don't belong to the target state and the genesis state
    70  //
    71  // It can take several hours(around 2 hours for mainnet) to finish
    72  // the whole pruning work. It's recommended to run this offline tool
    73  // periodically in order to release the disk usage and improve the
    74  // disk read performance to some extent.
    75  type Pruner struct {
    76  	config      Config
    77  	chainHeader *types.Header
    78  	db          ethdb.Database
    79  	stateBloom  *stateBloom
    80  	snaptree    *snapshot.Tree
    81  }
    82  
    83  // NewPruner creates the pruner instance.
    84  func NewPruner(db ethdb.Database, config Config) (*Pruner, error) {
    85  	headBlock := rawdb.ReadHeadBlock(db)
    86  	if headBlock == nil {
    87  		return nil, errors.New("failed to load head block")
    88  	}
    89  	snapconfig := snapshot.Config{
    90  		CacheSize:  256,
    91  		Recovery:   false,
    92  		NoBuild:    true,
    93  		AsyncBuild: false,
    94  	}
    95  	snaptree, err := snapshot.New(snapconfig, db, trie.NewDatabase(db), headBlock.Root())
    96  	if err != nil {
    97  		return nil, err // The relevant snapshot(s) might not exist
    98  	}
    99  	// Sanitize the bloom filter size if it's too small.
   100  	if config.BloomSize < 256 {
   101  		log.Warn("Sanitizing bloomfilter size", "provided(MB)", config.BloomSize, "updated(MB)", 256)
   102  		config.BloomSize = 256
   103  	}
   104  	stateBloom, err := newStateBloomWithSize(config.BloomSize)
   105  	if err != nil {
   106  		return nil, err
   107  	}
   108  	return &Pruner{
   109  		config:      config,
   110  		chainHeader: headBlock.Header(),
   111  		db:          db,
   112  		stateBloom:  stateBloom,
   113  		snaptree:    snaptree,
   114  	}, nil
   115  }
   116  
   117  func readStoredChainConfig(db ethdb.Database) *params.ChainConfig {
   118  	block0Hash := rawdb.ReadCanonicalHash(db, 0)
   119  	if block0Hash == (common.Hash{}) {
   120  		return nil
   121  	}
   122  	return rawdb.ReadChainConfig(db, block0Hash)
   123  }
   124  
   125  func removeOtherRoots(db ethdb.Database, rootsList []common.Hash, stateBloom *stateBloom) error {
   126  	chainConfig := readStoredChainConfig(db)
   127  	var genesisBlockNum uint64
   128  	if chainConfig != nil {
   129  		genesisBlockNum = chainConfig.ArbitrumChainParams.GenesisBlockNum
   130  	}
   131  	roots := make(map[common.Hash]struct{})
   132  	for _, root := range rootsList {
   133  		roots[root] = struct{}{}
   134  	}
   135  	headBlock := rawdb.ReadHeadBlock(db)
   136  	if headBlock == nil {
   137  		return errors.New("failed to load head block")
   138  	}
   139  	blockRange := headBlock.NumberU64() - genesisBlockNum
   140  	threads := runtime.NumCPU()
   141  	var wg sync.WaitGroup
   142  	errors := make(chan error, threads)
   143  	for thread := 0; thread < threads; thread++ {
   144  		thread := thread
   145  		wg.Add(1)
   146  		go func() {
   147  			defer wg.Done()
   148  			firstBlockNum := blockRange/uint64(threads)*uint64(thread+1) + genesisBlockNum
   149  			if thread == threads-1 {
   150  				firstBlockNum = headBlock.NumberU64()
   151  			}
   152  			endBlockNum := blockRange/uint64(threads)*uint64(thread) + genesisBlockNum
   153  			if thread != 0 {
   154  				// endBlockNum is the last block that will be checked
   155  				endBlockNum++
   156  			}
   157  			startedAt := time.Now()
   158  			lastLog := time.Now()
   159  			firstBlockHash := rawdb.ReadCanonicalHash(db, firstBlockNum)
   160  			block := rawdb.ReadBlock(db, firstBlockHash, firstBlockNum)
   161  			for {
   162  				if block == nil || block.Root() == (common.Hash{}) {
   163  					return
   164  				}
   165  				bloomContains, err := stateBloom.Contain(block.Root().Bytes())
   166  				if err != nil {
   167  					errors <- err
   168  					return
   169  				}
   170  				if bloomContains {
   171  					_, rootsContains := roots[block.Root()]
   172  					if !rootsContains {
   173  						log.Info(
   174  							"Found false positive state root bloom filter match",
   175  							"blockNum", block.Number(),
   176  							"blockHash", block.Hash(),
   177  							"stateRoot", block.Root(),
   178  						)
   179  						// This state root is a false positive of the bloom filter
   180  						err = db.Delete(block.Root().Bytes())
   181  						if err != nil {
   182  							errors <- err
   183  							return
   184  						}
   185  					}
   186  				}
   187  				if block.NumberU64() <= endBlockNum {
   188  					return
   189  				}
   190  				if thread == threads-1 && time.Since(lastLog) >= time.Second*30 {
   191  					lastLog = time.Now()
   192  					elapsed := time.Since(startedAt)
   193  					totalWork := float32(firstBlockNum - endBlockNum)
   194  					completedBlocks := float32(block.NumberU64() - endBlockNum)
   195  					log.Info("Removing old state roots", "elapsed", elapsed, "eta", time.Duration(float32(elapsed)*(totalWork/completedBlocks))-elapsed)
   196  				}
   197  				block = rawdb.ReadBlock(db, block.ParentHash(), block.NumberU64()-1)
   198  			}
   199  		}()
   200  	}
   201  	wg.Wait()
   202  	select {
   203  	case err := <-errors:
   204  		return err
   205  	default:
   206  		log.Info("Done removing old state roots")
   207  		return nil
   208  	}
   209  }
   210  
   211  // Arbitrum: snaptree and root are for the final snapshot kept
   212  func prune(snaptree *snapshot.Tree, allRoots []common.Hash, maindb ethdb.Database, stateBloom *stateBloom, bloomPath string, start time.Time) error {
   213  	// Delete all stale trie nodes in the disk. With the help of state bloom
   214  	// the trie nodes(and codes) belong to the active state will be filtered
   215  	// out. A very small part of stale tries will also be filtered because of
   216  	// the false-positive rate of bloom filter. But the assumption is held here
   217  	// that the false-positive is low enough(~0.05%). The probablity of the
   218  	// dangling node is the state root is super low. So the dangling nodes in
   219  	// theory will never ever be visited again.
   220  	var (
   221  		count  int
   222  		size   common.StorageSize
   223  		pstart = time.Now()
   224  		logged = time.Now()
   225  		batch  = maindb.NewBatch()
   226  		iter   = maindb.NewIterator(nil, nil)
   227  	)
   228  	log.Info("Loaded state bloom filter", "sizeMB", stateBloom.Size()/(1024*1024), "falsePositiveProbability", stateBloom.FalsePosititveProbability())
   229  	for iter.Next() {
   230  		key := iter.Key()
   231  
   232  		// All state entries don't belong to specific state and genesis are deleted here
   233  		// - trie node
   234  		// - legacy contract code
   235  		// - new-scheme contract code
   236  		isCode, codeKey := rawdb.IsCodeKey(key)
   237  		if len(key) == common.HashLength || isCode {
   238  			checkKey := key
   239  			if isCode {
   240  				checkKey = codeKey
   241  			}
   242  			if ok, err := stateBloom.Contain(checkKey); err != nil {
   243  				return err
   244  			} else if ok {
   245  				continue
   246  			}
   247  			count += 1
   248  			size += common.StorageSize(len(key) + len(iter.Value()))
   249  			batch.Delete(key)
   250  
   251  			var eta time.Duration // Realistically will never remain uninited
   252  			if done := binary.BigEndian.Uint64(key[:8]); done > 0 {
   253  				var (
   254  					left  = math.MaxUint64 - binary.BigEndian.Uint64(key[:8])
   255  					speed = done/uint64(time.Since(pstart)/time.Millisecond+1) + 1 // +1s to avoid division by zero
   256  				)
   257  				eta = time.Duration(left/speed) * time.Millisecond
   258  			}
   259  			if time.Since(logged) > 8*time.Second {
   260  				log.Info("Pruning state data", "nodes", count, "size", size,
   261  					"elapsed", common.PrettyDuration(time.Since(pstart)), "eta", common.PrettyDuration(eta))
   262  				logged = time.Now()
   263  			}
   264  			// Recreate the iterator after every batch commit in order
   265  			// to allow the underlying compactor to delete the entries.
   266  			if batch.ValueSize() >= ethdb.IdealBatchSize {
   267  				batch.Write()
   268  				batch.Reset()
   269  
   270  				iter.Release()
   271  				iter = maindb.NewIterator(nil, key)
   272  			}
   273  		}
   274  	}
   275  	if batch.ValueSize() > 0 {
   276  		batch.Write()
   277  		batch.Reset()
   278  	}
   279  	iter.Release()
   280  	log.Info("Pruned state data", "nodes", count, "size", size, "elapsed", common.PrettyDuration(time.Since(pstart)))
   281  
   282  	var snapRoot common.Hash
   283  	if len(allRoots) > 0 {
   284  		snapRoot = allRoots[len(allRoots)-1]
   285  	}
   286  	if snapRoot != (common.Hash{}) && snaptree.Snapshot(snapRoot) != nil {
   287  		// Pruning is done, now drop the "useless" layers from the snapshot.
   288  		// Firstly, flushing the target layer into the disk. After that all
   289  		// diff layers below the target will all be merged into the disk.
   290  		if err := snaptree.Cap(snapRoot, 0); err != nil {
   291  			return err
   292  		}
   293  		// Secondly, flushing the snapshot journal into the disk. All diff
   294  		// layers upon are dropped silently. Eventually the entire snapshot
   295  		// tree is converted into a single disk layer with the pruning target
   296  		// as the root.
   297  		if _, err := snaptree.Journal(snapRoot); err != nil {
   298  			return err
   299  		}
   300  	}
   301  
   302  	// Clean up any false positives that are top-level state roots.
   303  	err := removeOtherRoots(maindb, allRoots, stateBloom)
   304  	if err != nil {
   305  		return err
   306  	}
   307  
   308  	// Delete the state bloom, it marks the entire pruning procedure is
   309  	// finished. If any crashes or manual exit happens before this,
   310  	// `RecoverPruning` will pick it up in the next restarts to redo all
   311  	// the things.
   312  	os.RemoveAll(bloomPath)
   313  
   314  	// Start compactions, will remove the deleted data from the disk immediately.
   315  	// Note for small pruning, the compaction is skipped.
   316  	if count >= rangeCompactionThreshold {
   317  		cstart := time.Now()
   318  		for b := 0x00; b <= 0xf0; b += 0x10 {
   319  			var (
   320  				start = []byte{byte(b)}
   321  				end   = []byte{byte(b + 0x10)}
   322  			)
   323  			if b == 0xf0 {
   324  				end = nil
   325  			}
   326  			log.Info("Compacting database", "range", fmt.Sprintf("%#x-%#x", start, end), "elapsed", common.PrettyDuration(time.Since(cstart)))
   327  			if err := maindb.Compact(start, end); err != nil {
   328  				log.Error("Database compaction failed", "error", err)
   329  				return err
   330  			}
   331  		}
   332  		log.Info("Database compaction finished", "elapsed", common.PrettyDuration(time.Since(cstart)))
   333  	}
   334  	log.Info("State pruning successful", "pruned", size, "elapsed", common.PrettyDuration(time.Since(start)))
   335  	return nil
   336  }
   337  
   338  // We assume state blooms do not need the value, only the key
   339  func dumpRawTrieDescendants(db ethdb.Database, root common.Hash, output *stateBloom) error {
   340  	sdb := state.NewDatabase(db)
   341  	tr, err := sdb.OpenTrie(root)
   342  	if err != nil {
   343  		return err
   344  	}
   345  	accountIt := tr.NodeIterator(nil)
   346  	startedAt := time.Now()
   347  	lastLog := time.Now()
   348  
   349  	// We dump the storage of different accounts in parallel, but we want to limit this parallelism.
   350  	// To do so, we create a semaphore out of a channel's buffer.
   351  	// Before launching a new goroutine, we acquire the semaphore by taking an entry from this channel.
   352  	// This channel doubles as a mechanism for the background goroutine to report an error on release.
   353  	threads := runtime.NumCPU()
   354  	results := make(chan error, threads)
   355  	for i := 0; i < threads; i++ {
   356  		results <- nil
   357  	}
   358  
   359  	for accountIt.Next(true) {
   360  		accountTrieHash := accountIt.Hash()
   361  		// If the iterator hash is the empty hash, this is an embedded node
   362  		if accountTrieHash != (common.Hash{}) {
   363  			err = output.Put(accountTrieHash.Bytes(), nil)
   364  			if err != nil {
   365  				return err
   366  			}
   367  		}
   368  		if accountIt.Leaf() {
   369  			keyBytes := accountIt.LeafKey()
   370  			if len(keyBytes) != len(common.Hash{}) {
   371  				return fmt.Errorf("unexpected db key length %v", len(keyBytes))
   372  			}
   373  			key := common.BytesToHash(keyBytes)
   374  			if time.Since(lastLog) >= time.Second*30 {
   375  				lastLog = time.Now()
   376  				progress := binary.BigEndian.Uint16(key.Bytes()[:2])
   377  				elapsed := time.Since(startedAt)
   378  				log.Info("traversing trie database", "key", key, "elapsed", elapsed, "eta", time.Duration(float32(elapsed)*(256*256/float32(progress)))-elapsed)
   379  			}
   380  			var data types.StateAccount
   381  			if err := rlp.DecodeBytes(accountIt.LeafBlob(), &data); err != nil {
   382  				return fmt.Errorf("failed to decode account data: %w", err)
   383  			}
   384  			if !bytes.Equal(data.CodeHash, types.EmptyCodeHash[:]) {
   385  				output.Put(data.CodeHash, nil)
   386  			}
   387  			if data.Root != (common.Hash{}) {
   388  				storageTr, err := sdb.OpenStorageTrie(key, common.BytesToHash(accountIt.LeafKey()), data.Root)
   389  				if err != nil {
   390  					return err
   391  				}
   392  				err = <-results
   393  				if err != nil {
   394  					return err
   395  				}
   396  				go func() {
   397  					var err error
   398  					defer func() {
   399  						results <- err
   400  					}()
   401  					storageIt := storageTr.NodeIterator(nil)
   402  					for storageIt.Next(true) {
   403  						storageTrieHash := storageIt.Hash()
   404  						if storageTrieHash != (common.Hash{}) {
   405  							// The inner bloomfilter library has a mutex so concurrency is fine here
   406  							err = output.Put(storageTrieHash.Bytes(), nil)
   407  							if err != nil {
   408  								return
   409  							}
   410  						}
   411  					}
   412  					err = storageIt.Error()
   413  					if err != nil {
   414  						return
   415  					}
   416  				}()
   417  			}
   418  		}
   419  	}
   420  	if accountIt.Error() != nil {
   421  		return accountIt.Error()
   422  	}
   423  	for i := 0; i < threads; i++ {
   424  		err = <-results
   425  		if err != nil {
   426  			return err
   427  		}
   428  	}
   429  	return nil
   430  }
   431  
   432  // Prune deletes all historical state nodes except the nodes belong to the
   433  // specified state version. If user doesn't specify the state version, use
   434  // the bottom-most snapshot diff layer as the target.
   435  func (p *Pruner) Prune(inputRoots []common.Hash) error {
   436  	// If the state bloom filter is already committed previously,
   437  	// reuse it for pruning instead of generating a new one. It's
   438  	// mandatory because a part of state may already be deleted,
   439  	// the recovery procedure is necessary.
   440  	bloomExists, err := bloomFilterExists(p.config.Datadir)
   441  	if err != nil {
   442  		return err
   443  	}
   444  	if bloomExists {
   445  		return RecoverPruning(p.config.Datadir, p.db, p.config.Cachedir)
   446  	}
   447  	// Retrieve all snapshot layers from the current HEAD.
   448  	// In theory there are 128 difflayers + 1 disk layer present,
   449  	// so 128 diff layers are expected to be returned.
   450  	layers := p.snaptree.Snapshots(p.chainHeader.Root, -1, true)
   451  	var roots []common.Hash // replaces zero roots with snapshot roots
   452  	for _, root := range inputRoots {
   453  		snapshotTarget := root == common.Hash{}
   454  		if snapshotTarget {
   455  			if len(layers) == 0 {
   456  				log.Warn("No snapshot exists as pruning target")
   457  				continue
   458  			}
   459  			// Use the bottom-most diff layer as the target
   460  			root = layers[len(layers)-1].Root()
   461  		}
   462  		// Ensure the root is really present. The weak assumption
   463  		// is the presence of root can indicate the presence of the
   464  		// entire trie.
   465  		if !rawdb.HasLegacyTrieNode(p.db, root) {
   466  			if !snapshotTarget {
   467  				return fmt.Errorf("associated state[%x] is not present", root)
   468  			}
   469  			// The special case is for clique based networks(rinkeby, goerli
   470  			// and some other private networks), it's possible that two
   471  			// consecutive blocks will have same root. In this case snapshot
   472  			// difflayer won't be created. So HEAD-127 may not paired with
   473  			// head-127 layer. Instead the paired layer is higher than the
   474  			// bottom-most diff layer. Try to find the bottom-most snapshot
   475  			// layer with state available.
   476  			var found bool
   477  			for i := len(layers) - 2; i >= 0; i-- {
   478  				if rawdb.HasLegacyTrieNode(p.db, layers[i].Root()) {
   479  					root = layers[i].Root()
   480  					found = true
   481  					log.Info("Selecting middle-layer as the pruning target", "root", root, "depth", i)
   482  					break
   483  				}
   484  			}
   485  			if !found {
   486  				return errors.New("no snapshot paired state")
   487  			}
   488  		} else {
   489  			if len(layers) > 0 {
   490  				log.Info("Selecting bottom-most difflayer as the pruning target", "root", root, "height", p.chainHeader.Number.Uint64()-127)
   491  			} else {
   492  				log.Info("Selecting user-specified state as the pruning target", "root", root)
   493  			}
   494  		}
   495  		roots = append(roots, root)
   496  	}
   497  	if len(roots) == 0 {
   498  		return errors.New("no pruning target roots found")
   499  	}
   500  	// Before start the pruning, delete the clean trie cache first.
   501  	// It's necessary otherwise in the next restart we will hit the
   502  	// deleted state root in the "clean cache" so that the incomplete
   503  	// state is picked for usage.
   504  	deleteCleanTrieCache(p.config.Cachedir)
   505  
   506  	// Traverse the target state, re-construct the whole state trie and
   507  	// commit to the given bloom filter.
   508  	start := time.Now()
   509  	for _, root := range roots {
   510  		log.Info("Building bloom filter for pruning", "root", root)
   511  		if p.snaptree.Snapshot(root) != nil {
   512  			if err := snapshot.GenerateTrie(p.snaptree, root, p.db, p.stateBloom); err != nil {
   513  				return err
   514  			}
   515  		} else {
   516  			if err := dumpRawTrieDescendants(p.db, root, p.stateBloom); err != nil {
   517  				return err
   518  			}
   519  		}
   520  	}
   521  	// Traverse the genesis, put all genesis state entries into the
   522  	// bloom filter too.
   523  	if err := extractGenesis(p.db, p.stateBloom); err != nil {
   524  		return err
   525  	}
   526  
   527  	filterName := bloomFilterPath(p.config.Datadir)
   528  
   529  	log.Info("Writing state bloom to disk", "name", filterName, "roots", roots)
   530  	if err := p.stateBloom.Commit(filterName, filterName+stateBloomFileTempSuffix, roots); err != nil {
   531  		return err
   532  	}
   533  	log.Info("State bloom filter committed", "name", filterName, "roots", roots)
   534  	return prune(p.snaptree, roots, p.db, p.stateBloom, filterName, start)
   535  }
   536  
   537  // RecoverPruning will resume the pruning procedure during the system restart.
   538  // This function is used in this case: user tries to prune state data, but the
   539  // system was interrupted midway because of crash or manual-kill. In this case
   540  // if the bloom filter for filtering active state is already constructed, the
   541  // pruning can be resumed. What's more if the bloom filter is constructed, the
   542  // pruning **has to be resumed**. Otherwise a lot of dangling nodes may be left
   543  // in the disk.
   544  func RecoverPruning(datadir string, db ethdb.Database, trieCachePath string) error {
   545  	exists, err := bloomFilterExists(datadir)
   546  	if err != nil {
   547  		return err
   548  	}
   549  	if !exists {
   550  		return nil // nothing to recover
   551  	}
   552  	headBlock := rawdb.ReadHeadBlock(db)
   553  	if headBlock == nil {
   554  		return errors.New("failed to load head block")
   555  	}
   556  	// Initialize the snapshot tree in recovery mode to handle this special case:
   557  	// - Users run the `prune-state` command multiple times
   558  	// - Neither these `prune-state` running is finished(e.g. interrupted manually)
   559  	// - The state bloom filter is already generated, a part of state is deleted,
   560  	//   so that resuming the pruning here is mandatory
   561  	// - The state HEAD is rewound already because of multiple incomplete `prune-state`
   562  	// In this case, even the state HEAD is not exactly matched with snapshot, it
   563  	// still feasible to recover the pruning correctly.
   564  	snapconfig := snapshot.Config{
   565  		CacheSize:  256,
   566  		Recovery:   true,
   567  		NoBuild:    true,
   568  		AsyncBuild: false,
   569  	}
   570  	snaptree, err := snapshot.New(snapconfig, db, trie.NewDatabase(db), headBlock.Root())
   571  	if err != nil {
   572  		return err // The relevant snapshot(s) might not exist
   573  	}
   574  	stateBloomPath := bloomFilterPath(datadir)
   575  	stateBloom, stateBloomRoots, err := NewStateBloomFromDisk(stateBloomPath)
   576  	if err != nil {
   577  		return err
   578  	}
   579  	log.Info("Loaded state bloom filter", "path", stateBloomPath, "roots", stateBloomRoots)
   580  
   581  	// Before start the pruning, delete the clean trie cache first.
   582  	// It's necessary otherwise in the next restart we will hit the
   583  	// deleted state root in the "clean cache" so that the incomplete
   584  	// state is picked for usage.
   585  	deleteCleanTrieCache(trieCachePath)
   586  
   587  	return prune(snaptree, stateBloomRoots, db, stateBloom, stateBloomPath, time.Now())
   588  }
   589  
   590  // extractGenesis loads the genesis state and commits all the state entries
   591  // into the given bloomfilter.
   592  func extractGenesis(db ethdb.Database, stateBloom *stateBloom) error {
   593  	genesisHash := rawdb.ReadCanonicalHash(db, 0)
   594  	if genesisHash == (common.Hash{}) {
   595  		return errors.New("missing genesis hash")
   596  	}
   597  	genesis := rawdb.ReadBlock(db, genesisHash, 0)
   598  	if genesis == nil {
   599  		return errors.New("missing genesis block")
   600  	}
   601  
   602  	return dumpRawTrieDescendants(db, genesis.Root(), stateBloom)
   603  }
   604  
   605  func bloomFilterPath(datadir string) string {
   606  	return filepath.Join(datadir, stateBloomFileName)
   607  }
   608  
   609  func bloomFilterExists(datadir string) (bool, error) {
   610  	_, err := os.Stat(bloomFilterPath(datadir))
   611  	if errors.Is(err, os.ErrNotExist) {
   612  		return false, nil
   613  	} else if err != nil {
   614  		return false, err
   615  	} else {
   616  		return true, nil
   617  	}
   618  }
   619  
   620  const warningLog = `
   621  
   622  WARNING!
   623  
   624  The clean trie cache is not found. Please delete it by yourself after the
   625  pruning. Remember don't start the Geth without deleting the clean trie cache
   626  otherwise the entire database may be damaged!
   627  
   628  Check the command description "geth snapshot prune-state --help" for more details.
   629  `
   630  
   631  func deleteCleanTrieCache(path string) {
   632  	if !common.FileExist(path) {
   633  		log.Warn(warningLog)
   634  		return
   635  	}
   636  	os.RemoveAll(path)
   637  	log.Info("Deleted trie clean cache", "path", path)
   638  }