github.1485827954.workers.dev/ethereum/go-ethereum@v1.14.3/triedb/pathdb/database.go (about)

     1  // Copyright 2022 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package pathdb
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  	"io"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/ethereum/go-ethereum/common"
    27  	"github.com/ethereum/go-ethereum/core/rawdb"
    28  	"github.com/ethereum/go-ethereum/core/types"
    29  	"github.com/ethereum/go-ethereum/crypto"
    30  	"github.com/ethereum/go-ethereum/ethdb"
    31  	"github.com/ethereum/go-ethereum/log"
    32  	"github.com/ethereum/go-ethereum/params"
    33  	"github.com/ethereum/go-ethereum/trie/trienode"
    34  	"github.com/ethereum/go-ethereum/trie/triestate"
    35  )
    36  
    37  const (
    38  	// defaultCleanSize is the default memory allowance of clean cache.
    39  	defaultCleanSize = 16 * 1024 * 1024
    40  
    41  	// maxBufferSize is the maximum memory allowance of node buffer.
    42  	// Too large nodebuffer will cause the system to pause for a long
    43  	// time when write happens. Also, the largest batch that pebble can
    44  	// support is 4GB, node will panic if batch size exceeds this limit.
    45  	maxBufferSize = 256 * 1024 * 1024
    46  
    47  	// DefaultBufferSize is the default memory allowance of node buffer
    48  	// that aggregates the writes from above until it's flushed into the
    49  	// disk. It's meant to be used once the initial sync is finished.
    50  	// Do not increase the buffer size arbitrarily, otherwise the system
    51  	// pause time will increase when the database writes happen.
    52  	DefaultBufferSize = 64 * 1024 * 1024
    53  )
    54  
    55  var (
    56  	// maxDiffLayers is the maximum diff layers allowed in the layer tree.
    57  	maxDiffLayers = 128
    58  )
    59  
    60  // layer is the interface implemented by all state layers which includes some
    61  // public methods and some additional methods for internal usage.
    62  type layer interface {
    63  	// node retrieves the trie node with the node info. An error will be returned
    64  	// if the read operation exits abnormally. Specifically, if the layer is
    65  	// already stale.
    66  	//
    67  	// Note, no error will be returned if the requested node is not found in database.
    68  	node(owner common.Hash, path []byte, depth int) ([]byte, common.Hash, *nodeLoc, error)
    69  
    70  	// rootHash returns the root hash for which this layer was made.
    71  	rootHash() common.Hash
    72  
    73  	// stateID returns the associated state id of layer.
    74  	stateID() uint64
    75  
    76  	// parentLayer returns the subsequent layer of it, or nil if the disk was reached.
    77  	parentLayer() layer
    78  
    79  	// update creates a new layer on top of the existing layer diff tree with
    80  	// the provided dirty trie nodes along with the state change set.
    81  	//
    82  	// Note, the maps are retained by the method to avoid copying everything.
    83  	update(root common.Hash, id uint64, block uint64, nodes map[common.Hash]map[string]*trienode.Node, states *triestate.Set) *diffLayer
    84  
    85  	// journal commits an entire diff hierarchy to disk into a single journal entry.
    86  	// This is meant to be used during shutdown to persist the layer without
    87  	// flattening everything down (bad for reorgs).
    88  	journal(w io.Writer) error
    89  }
    90  
    91  // Config contains the settings for database.
    92  type Config struct {
    93  	StateHistory   uint64 // Number of recent blocks to maintain state history for
    94  	CleanCacheSize int    // Maximum memory allowance (in bytes) for caching clean nodes
    95  	DirtyCacheSize int    // Maximum memory allowance (in bytes) for caching dirty nodes
    96  	ReadOnly       bool   // Flag whether the database is opened in read only mode.
    97  }
    98  
    99  // sanitize checks the provided user configurations and changes anything that's
   100  // unreasonable or unworkable.
   101  func (c *Config) sanitize() *Config {
   102  	conf := *c
   103  	if conf.DirtyCacheSize > maxBufferSize {
   104  		log.Warn("Sanitizing invalid node buffer size", "provided", common.StorageSize(conf.DirtyCacheSize), "updated", common.StorageSize(maxBufferSize))
   105  		conf.DirtyCacheSize = maxBufferSize
   106  	}
   107  	return &conf
   108  }
   109  
   110  // Defaults contains default settings for Ethereum mainnet.
   111  var Defaults = &Config{
   112  	StateHistory:   params.FullImmutabilityThreshold,
   113  	CleanCacheSize: defaultCleanSize,
   114  	DirtyCacheSize: DefaultBufferSize,
   115  }
   116  
   117  // ReadOnly is the config in order to open database in read only mode.
   118  var ReadOnly = &Config{ReadOnly: true}
   119  
   120  // Database is a multiple-layered structure for maintaining in-memory trie nodes.
   121  // It consists of one persistent base layer backed by a key-value store, on top
   122  // of which arbitrarily many in-memory diff layers are stacked. The memory diffs
   123  // can form a tree with branching, but the disk layer is singleton and common to
   124  // all. If a reorg goes deeper than the disk layer, a batch of reverse diffs can
   125  // be applied to rollback. The deepest reorg that can be handled depends on the
   126  // amount of state histories tracked in the disk.
   127  //
   128  // At most one readable and writable database can be opened at the same time in
   129  // the whole system which ensures that only one database writer can operate disk
   130  // state. Unexpected open operations can cause the system to panic.
   131  type Database struct {
   132  	// readOnly is the flag whether the mutation is allowed to be applied.
   133  	// It will be set automatically when the database is journaled during
   134  	// the shutdown to reject all following unexpected mutations.
   135  	readOnly   bool                         // Flag if database is opened in read only mode
   136  	waitSync   bool                         // Flag if database is deactivated due to initial state sync
   137  	isVerkle   bool                         // Flag if database is used for verkle tree
   138  	bufferSize int                          // Memory allowance (in bytes) for caching dirty nodes
   139  	config     *Config                      // Configuration for database
   140  	diskdb     ethdb.Database               // Persistent storage for matured trie nodes
   141  	tree       *layerTree                   // The group for all known layers
   142  	freezer    ethdb.ResettableAncientStore // Freezer for storing trie histories, nil possible in tests
   143  	lock       sync.RWMutex                 // Lock to prevent mutations from happening at the same time
   144  }
   145  
   146  // New attempts to load an already existing layer from a persistent key-value
   147  // store (with a number of memory layers from a journal). If the journal is not
   148  // matched with the base persistent layer, all the recorded diff layers are discarded.
   149  func New(diskdb ethdb.Database, config *Config, isVerkle bool) *Database {
   150  	if config == nil {
   151  		config = Defaults
   152  	}
   153  	config = config.sanitize()
   154  
   155  	db := &Database{
   156  		readOnly:   config.ReadOnly,
   157  		isVerkle:   isVerkle,
   158  		bufferSize: config.DirtyCacheSize,
   159  		config:     config,
   160  		diskdb:     diskdb,
   161  	}
   162  	// Construct the layer tree by resolving the in-disk singleton state
   163  	// and in-memory layer journal.
   164  	db.tree = newLayerTree(db.loadLayers())
   165  
   166  	// Repair the state history, which might not be aligned with the state
   167  	// in the key-value store due to an unclean shutdown.
   168  	if err := db.repairHistory(); err != nil {
   169  		log.Crit("Failed to repair pathdb", "err", err)
   170  	}
   171  	// Disable database in case node is still in the initial state sync stage.
   172  	if rawdb.ReadSnapSyncStatusFlag(diskdb) == rawdb.StateSyncRunning && !db.readOnly {
   173  		if err := db.Disable(); err != nil {
   174  			log.Crit("Failed to disable database", "err", err) // impossible to happen
   175  		}
   176  	}
   177  	return db
   178  }
   179  
   180  // repairHistory truncates leftover state history objects, which may occur due
   181  // to an unclean shutdown or other unexpected reasons.
   182  func (db *Database) repairHistory() error {
   183  	// Open the freezer for state history. This mechanism ensures that
   184  	// only one database instance can be opened at a time to prevent
   185  	// accidental mutation.
   186  	ancient, err := db.diskdb.AncientDatadir()
   187  	if err != nil {
   188  		// TODO error out if ancient store is disabled. A tons of unit tests
   189  		// disable the ancient store thus the error here will immediately fail
   190  		// all of them. Fix the tests first.
   191  		return nil
   192  	}
   193  	freezer, err := rawdb.NewStateFreezer(ancient, false)
   194  	if err != nil {
   195  		log.Crit("Failed to open state history freezer", "err", err)
   196  	}
   197  	db.freezer = freezer
   198  
   199  	// Reset the entire state histories if the trie database is not initialized
   200  	// yet. This action is necessary because these state histories are not
   201  	// expected to exist without an initialized trie database.
   202  	id := db.tree.bottom().stateID()
   203  	if id == 0 {
   204  		frozen, err := db.freezer.Ancients()
   205  		if err != nil {
   206  			log.Crit("Failed to retrieve head of state history", "err", err)
   207  		}
   208  		if frozen != 0 {
   209  			err := db.freezer.Reset()
   210  			if err != nil {
   211  				log.Crit("Failed to reset state histories", "err", err)
   212  			}
   213  			log.Info("Truncated extraneous state history")
   214  		}
   215  		return nil
   216  	}
   217  	// Truncate the extra state histories above in freezer in case it's not
   218  	// aligned with the disk layer. It might happen after a unclean shutdown.
   219  	pruned, err := truncateFromHead(db.diskdb, db.freezer, id)
   220  	if err != nil {
   221  		log.Crit("Failed to truncate extra state histories", "err", err)
   222  	}
   223  	if pruned != 0 {
   224  		log.Warn("Truncated extra state histories", "number", pruned)
   225  	}
   226  	return nil
   227  }
   228  
   229  // Update adds a new layer into the tree, if that can be linked to an existing
   230  // old parent. It is disallowed to insert a disk layer (the origin of all). Apart
   231  // from that this function will flatten the extra diff layers at bottom into disk
   232  // to only keep 128 diff layers in memory by default.
   233  //
   234  // The passed in maps(nodes, states) will be retained to avoid copying everything.
   235  // Therefore, these maps must not be changed afterwards.
   236  func (db *Database) Update(root common.Hash, parentRoot common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error {
   237  	// Hold the lock to prevent concurrent mutations.
   238  	db.lock.Lock()
   239  	defer db.lock.Unlock()
   240  
   241  	// Short circuit if the mutation is not allowed.
   242  	if err := db.modifyAllowed(); err != nil {
   243  		return err
   244  	}
   245  	if err := db.tree.add(root, parentRoot, block, nodes, states); err != nil {
   246  		return err
   247  	}
   248  	// Keep 128 diff layers in the memory, persistent layer is 129th.
   249  	// - head layer is paired with HEAD state
   250  	// - head-1 layer is paired with HEAD-1 state
   251  	// - head-127 layer(bottom-most diff layer) is paired with HEAD-127 state
   252  	// - head-128 layer(disk layer) is paired with HEAD-128 state
   253  	return db.tree.cap(root, maxDiffLayers)
   254  }
   255  
   256  // Commit traverses downwards the layer tree from a specified layer with the
   257  // provided state root and all the layers below are flattened downwards. It
   258  // can be used alone and mostly for test purposes.
   259  func (db *Database) Commit(root common.Hash, report bool) error {
   260  	// Hold the lock to prevent concurrent mutations.
   261  	db.lock.Lock()
   262  	defer db.lock.Unlock()
   263  
   264  	// Short circuit if the mutation is not allowed.
   265  	if err := db.modifyAllowed(); err != nil {
   266  		return err
   267  	}
   268  	return db.tree.cap(root, 0)
   269  }
   270  
   271  // Disable deactivates the database and invalidates all available state layers
   272  // as stale to prevent access to the persistent state, which is in the syncing
   273  // stage.
   274  func (db *Database) Disable() error {
   275  	db.lock.Lock()
   276  	defer db.lock.Unlock()
   277  
   278  	// Short circuit if the database is in read only mode.
   279  	if db.readOnly {
   280  		return errDatabaseReadOnly
   281  	}
   282  	// Prevent duplicated disable operation.
   283  	if db.waitSync {
   284  		log.Error("Reject duplicated disable operation")
   285  		return nil
   286  	}
   287  	db.waitSync = true
   288  
   289  	// Mark the disk layer as stale to prevent access to persistent state.
   290  	db.tree.bottom().markStale()
   291  
   292  	// Write the initial sync flag to persist it across restarts.
   293  	rawdb.WriteSnapSyncStatusFlag(db.diskdb, rawdb.StateSyncRunning)
   294  	log.Info("Disabled trie database due to state sync")
   295  	return nil
   296  }
   297  
   298  // Enable activates database and resets the state tree with the provided persistent
   299  // state root once the state sync is finished.
   300  func (db *Database) Enable(root common.Hash) error {
   301  	db.lock.Lock()
   302  	defer db.lock.Unlock()
   303  
   304  	// Short circuit if the database is in read only mode.
   305  	if db.readOnly {
   306  		return errDatabaseReadOnly
   307  	}
   308  	// Ensure the provided state root matches the stored one.
   309  	root = types.TrieRootHash(root)
   310  	stored := types.EmptyRootHash
   311  	if blob := rawdb.ReadAccountTrieNode(db.diskdb, nil); len(blob) > 0 {
   312  		stored = crypto.Keccak256Hash(blob)
   313  	}
   314  	if stored != root {
   315  		return fmt.Errorf("state root mismatch: stored %x, synced %x", stored, root)
   316  	}
   317  	// Drop the stale state journal in persistent database and
   318  	// reset the persistent state id back to zero.
   319  	batch := db.diskdb.NewBatch()
   320  	rawdb.DeleteTrieJournal(batch)
   321  	rawdb.WritePersistentStateID(batch, 0)
   322  	if err := batch.Write(); err != nil {
   323  		return err
   324  	}
   325  	// Clean up all state histories in freezer. Theoretically
   326  	// all root->id mappings should be removed as well. Since
   327  	// mappings can be huge and might take a while to clear
   328  	// them, just leave them in disk and wait for overwriting.
   329  	if db.freezer != nil {
   330  		if err := db.freezer.Reset(); err != nil {
   331  			return err
   332  		}
   333  	}
   334  	// Re-construct a new disk layer backed by persistent state
   335  	// with **empty clean cache and node buffer**.
   336  	db.tree.reset(newDiskLayer(root, 0, db, nil, newNodeBuffer(db.bufferSize, nil, 0)))
   337  
   338  	// Re-enable the database as the final step.
   339  	db.waitSync = false
   340  	rawdb.WriteSnapSyncStatusFlag(db.diskdb, rawdb.StateSyncFinished)
   341  	log.Info("Rebuilt trie database", "root", root)
   342  	return nil
   343  }
   344  
   345  // Recover rollbacks the database to a specified historical point.
   346  // The state is supported as the rollback destination only if it's
   347  // canonical state and the corresponding trie histories are existent.
   348  func (db *Database) Recover(root common.Hash, loader triestate.TrieLoader) error {
   349  	db.lock.Lock()
   350  	defer db.lock.Unlock()
   351  
   352  	// Short circuit if rollback operation is not supported.
   353  	if err := db.modifyAllowed(); err != nil {
   354  		return err
   355  	}
   356  	if db.freezer == nil {
   357  		return errors.New("state rollback is non-supported")
   358  	}
   359  	// Short circuit if the target state is not recoverable.
   360  	root = types.TrieRootHash(root)
   361  	if !db.Recoverable(root) {
   362  		return errStateUnrecoverable
   363  	}
   364  	// Apply the state histories upon the disk layer in order.
   365  	var (
   366  		start = time.Now()
   367  		dl    = db.tree.bottom()
   368  	)
   369  	for dl.rootHash() != root {
   370  		h, err := readHistory(db.freezer, dl.stateID())
   371  		if err != nil {
   372  			return err
   373  		}
   374  		dl, err = dl.revert(h, loader)
   375  		if err != nil {
   376  			return err
   377  		}
   378  		// reset layer with newly created disk layer. It must be
   379  		// done after each revert operation, otherwise the new
   380  		// disk layer won't be accessible from outside.
   381  		db.tree.reset(dl)
   382  	}
   383  	rawdb.DeleteTrieJournal(db.diskdb)
   384  	_, err := truncateFromHead(db.diskdb, db.freezer, dl.stateID())
   385  	if err != nil {
   386  		return err
   387  	}
   388  	log.Debug("Recovered state", "root", root, "elapsed", common.PrettyDuration(time.Since(start)))
   389  	return nil
   390  }
   391  
   392  // Recoverable returns the indicator if the specified state is recoverable.
   393  func (db *Database) Recoverable(root common.Hash) bool {
   394  	// Ensure the requested state is a known state.
   395  	root = types.TrieRootHash(root)
   396  	id := rawdb.ReadStateID(db.diskdb, root)
   397  	if id == nil {
   398  		return false
   399  	}
   400  	// Recoverable state must below the disk layer. The recoverable
   401  	// state only refers the state that is currently not available,
   402  	// but can be restored by applying state history.
   403  	dl := db.tree.bottom()
   404  	if *id >= dl.stateID() {
   405  		return false
   406  	}
   407  	// This is a temporary workaround for the unavailability of the freezer in
   408  	// dev mode. As a consequence, the Pathdb loses the ability for deep reorg
   409  	// in certain cases.
   410  	// TODO(rjl493456442): Implement the in-memory ancient store.
   411  	if db.freezer == nil {
   412  		return false
   413  	}
   414  	// Ensure the requested state is a canonical state and all state
   415  	// histories in range [id+1, disklayer.ID] are present and complete.
   416  	return checkHistories(db.freezer, *id+1, dl.stateID()-*id, func(m *meta) error {
   417  		if m.parent != root {
   418  			return errors.New("unexpected state history")
   419  		}
   420  		root = m.root
   421  		return nil
   422  	}) == nil
   423  }
   424  
   425  // Close closes the trie database and the held freezer.
   426  func (db *Database) Close() error {
   427  	db.lock.Lock()
   428  	defer db.lock.Unlock()
   429  
   430  	// Set the database to read-only mode to prevent all
   431  	// following mutations.
   432  	db.readOnly = true
   433  
   434  	// Release the memory held by clean cache.
   435  	db.tree.bottom().resetCache()
   436  
   437  	// Close the attached state history freezer.
   438  	if db.freezer == nil {
   439  		return nil
   440  	}
   441  	return db.freezer.Close()
   442  }
   443  
   444  // Size returns the current storage size of the memory cache in front of the
   445  // persistent database layer.
   446  func (db *Database) Size() (diffs common.StorageSize, nodes common.StorageSize) {
   447  	db.tree.forEach(func(layer layer) {
   448  		if diff, ok := layer.(*diffLayer); ok {
   449  			diffs += common.StorageSize(diff.memory)
   450  		}
   451  		if disk, ok := layer.(*diskLayer); ok {
   452  			nodes += disk.size()
   453  		}
   454  	})
   455  	return diffs, nodes
   456  }
   457  
   458  // Initialized returns an indicator if the state data is already
   459  // initialized in path-based scheme.
   460  func (db *Database) Initialized(genesisRoot common.Hash) bool {
   461  	var inited bool
   462  	db.tree.forEach(func(layer layer) {
   463  		if layer.rootHash() != types.EmptyRootHash {
   464  			inited = true
   465  		}
   466  	})
   467  	if !inited {
   468  		inited = rawdb.ReadSnapSyncStatusFlag(db.diskdb) != rawdb.StateSyncUnknown
   469  	}
   470  	return inited
   471  }
   472  
   473  // SetBufferSize sets the node buffer size to the provided value(in bytes).
   474  func (db *Database) SetBufferSize(size int) error {
   475  	db.lock.Lock()
   476  	defer db.lock.Unlock()
   477  
   478  	if size > maxBufferSize {
   479  		log.Info("Capped node buffer size", "provided", common.StorageSize(size), "adjusted", common.StorageSize(maxBufferSize))
   480  		size = maxBufferSize
   481  	}
   482  	db.bufferSize = size
   483  	return db.tree.bottom().setBufferSize(db.bufferSize)
   484  }
   485  
   486  // modifyAllowed returns the indicator if mutation is allowed. This function
   487  // assumes the db.lock is already held.
   488  func (db *Database) modifyAllowed() error {
   489  	if db.readOnly {
   490  		return errDatabaseReadOnly
   491  	}
   492  	if db.waitSync {
   493  		return errDatabaseWaitSync
   494  	}
   495  	return nil
   496  }
   497  
   498  // AccountHistory inspects the account history within the specified range.
   499  //
   500  // Start: State ID of the first history object for the query. 0 implies the first
   501  // available object is selected as the starting point.
   502  //
   503  // End: State ID of the last history for the query. 0 implies the last available
   504  // object is selected as the ending point. Note end is included in the query.
   505  func (db *Database) AccountHistory(address common.Address, start, end uint64) (*HistoryStats, error) {
   506  	return accountHistory(db.freezer, address, start, end)
   507  }
   508  
   509  // StorageHistory inspects the storage history within the specified range.
   510  //
   511  // Start: State ID of the first history object for the query. 0 implies the first
   512  // available object is selected as the starting point.
   513  //
   514  // End: State ID of the last history for the query. 0 implies the last available
   515  // object is selected as the ending point. Note end is included in the query.
   516  //
   517  // Note, slot refers to the hash of the raw slot key.
   518  func (db *Database) StorageHistory(address common.Address, slot common.Hash, start uint64, end uint64) (*HistoryStats, error) {
   519  	return storageHistory(db.freezer, address, slot, start, end)
   520  }
   521  
   522  // HistoryRange returns the block numbers associated with earliest and latest
   523  // state history in the local store.
   524  func (db *Database) HistoryRange() (uint64, uint64, error) {
   525  	return historyRange(db.freezer)
   526  }