github.com/ethereum/go-ethereum@v1.16.1/triedb/pathdb/states.go (about)

     1  // Copyright 2024 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package pathdb
    18  
    19  import (
    20  	"fmt"
    21  	"io"
    22  	"maps"
    23  	"slices"
    24  	"sync"
    25  
    26  	"github.com/VictoriaMetrics/fastcache"
    27  	"github.com/ethereum/go-ethereum/common"
    28  	"github.com/ethereum/go-ethereum/core/rawdb"
    29  	"github.com/ethereum/go-ethereum/ethdb"
    30  	"github.com/ethereum/go-ethereum/log"
    31  	"github.com/ethereum/go-ethereum/metrics"
    32  	"github.com/ethereum/go-ethereum/rlp"
    33  )
    34  
    35  // counter helps in tracking items and their corresponding sizes.
    36  type counter struct {
    37  	n    int
    38  	size int
    39  }
    40  
    41  // add size to the counter and increase the item counter.
    42  func (c *counter) add(size int) {
    43  	c.n++
    44  	c.size += size
    45  }
    46  
    47  // report uploads the cached statistics to meters.
    48  func (c *counter) report(count, size *metrics.Meter) {
    49  	count.Mark(int64(c.n))
    50  	size.Mark(int64(c.size))
    51  }
    52  
    53  // stateSet represents a collection of state modifications associated with a
    54  // transition (e.g., a block execution) or multiple aggregated transitions.
    55  //
    56  // A stateSet can only reside within a diffLayer or the buffer of a diskLayer,
    57  // serving as the envelope for the set. Lock protection is not required for
    58  // accessing or mutating the account set and storage set, as the associated
    59  // envelope is always marked as stale before any mutation is applied. Any
    60  // subsequent state access will be denied due to the stale flag. Therefore,
    61  // state access and mutation won't happen at the same time with guarantee.
    62  type stateSet struct {
    63  	accountData map[common.Hash][]byte                 // Keyed accounts for direct retrieval (nil means deleted)
    64  	storageData map[common.Hash]map[common.Hash][]byte // Keyed storage slots for direct retrieval. one per account (nil means deleted)
    65  	size        uint64                                 // Memory size of the state data (accountData and storageData)
    66  
    67  	accountListSorted []common.Hash                 // List of account for iteration. If it exists, it's sorted, otherwise it's nil
    68  	storageListSorted map[common.Hash][]common.Hash // List of storage slots for iterated retrievals, one per account. Any existing lists are sorted if non-nil
    69  
    70  	rawStorageKey bool // indicates whether the storage set uses the raw slot key or the hash
    71  
    72  	// Lock for guarding the two lists above. These lists might be accessed
    73  	// concurrently and lock protection is essential to avoid concurrent
    74  	// slice or map read/write.
    75  	listLock sync.RWMutex
    76  }
    77  
    78  // newStates constructs the state set with the provided account and storage data.
    79  func newStates(accounts map[common.Hash][]byte, storages map[common.Hash]map[common.Hash][]byte, rawStorageKey bool) *stateSet {
    80  	// Don't panic for the lazy callers, initialize the nil maps instead.
    81  	if accounts == nil {
    82  		accounts = make(map[common.Hash][]byte)
    83  	}
    84  	if storages == nil {
    85  		storages = make(map[common.Hash]map[common.Hash][]byte)
    86  	}
    87  	s := &stateSet{
    88  		accountData:       accounts,
    89  		storageData:       storages,
    90  		rawStorageKey:     rawStorageKey,
    91  		storageListSorted: make(map[common.Hash][]common.Hash),
    92  	}
    93  	s.size = s.check()
    94  	return s
    95  }
    96  
    97  // account returns the account data associated with the specified address hash.
    98  func (s *stateSet) account(hash common.Hash) ([]byte, bool) {
    99  	// If the account is known locally, return it
   100  	if data, ok := s.accountData[hash]; ok {
   101  		return data, true
   102  	}
   103  	return nil, false // account is unknown in this set
   104  }
   105  
   106  // mustAccount returns the account data associated with the specified address
   107  // hash. The difference is this function will return an error if the account
   108  // is not found.
   109  func (s *stateSet) mustAccount(hash common.Hash) ([]byte, error) {
   110  	// If the account is known locally, return it
   111  	if data, ok := s.accountData[hash]; ok {
   112  		return data, nil
   113  	}
   114  	return nil, fmt.Errorf("account is not found, %x", hash)
   115  }
   116  
   117  // storage returns the storage slot associated with the specified address hash
   118  // and storage key hash.
   119  func (s *stateSet) storage(accountHash, storageHash common.Hash) ([]byte, bool) {
   120  	// If the account is known locally, try to resolve the slot locally
   121  	if storage, ok := s.storageData[accountHash]; ok {
   122  		if data, ok := storage[storageHash]; ok {
   123  			return data, true
   124  		}
   125  	}
   126  	return nil, false // storage is unknown in this set
   127  }
   128  
   129  // mustStorage returns the storage slot associated with the specified address
   130  // hash and storage key hash. The difference is this function will return an
   131  // error if the storage slot is not found.
   132  func (s *stateSet) mustStorage(accountHash, storageHash common.Hash) ([]byte, error) {
   133  	// If the account is known locally, try to resolve the slot locally
   134  	if storage, ok := s.storageData[accountHash]; ok {
   135  		if data, ok := storage[storageHash]; ok {
   136  			return data, nil
   137  		}
   138  	}
   139  	return nil, fmt.Errorf("storage slot is not found, %x %x", accountHash, storageHash)
   140  }
   141  
   142  // check sanitizes accounts and storage slots to ensure the data validity.
   143  // Additionally, it computes the total memory size occupied by the maps.
   144  func (s *stateSet) check() uint64 {
   145  	var size int
   146  	for _, blob := range s.accountData {
   147  		size += common.HashLength + len(blob)
   148  	}
   149  	for accountHash, slots := range s.storageData {
   150  		if slots == nil {
   151  			panic(fmt.Sprintf("storage %#x nil", accountHash)) // nil slots is not permitted
   152  		}
   153  		for _, blob := range slots {
   154  			size += 2*common.HashLength + len(blob)
   155  		}
   156  	}
   157  	return uint64(size)
   158  }
   159  
   160  // accountList returns a sorted list of all accounts in this state set, including
   161  // the deleted ones.
   162  //
   163  // Note, the returned slice is not a copy, so do not modify it.
   164  func (s *stateSet) accountList() []common.Hash {
   165  	// If an old list already exists, return it
   166  	s.listLock.RLock()
   167  	list := s.accountListSorted
   168  	s.listLock.RUnlock()
   169  
   170  	if list != nil {
   171  		return list
   172  	}
   173  	// No old sorted account list exists, generate a new one. It's possible that
   174  	// multiple threads waiting for the write lock may regenerate the list
   175  	// multiple times, which is acceptable.
   176  	s.listLock.Lock()
   177  	defer s.listLock.Unlock()
   178  
   179  	list = slices.SortedFunc(maps.Keys(s.accountData), common.Hash.Cmp)
   180  	s.accountListSorted = list
   181  	return list
   182  }
   183  
   184  // StorageList returns a sorted list of all storage slot hashes in this state set
   185  // for the given account. The returned list will include the hash of deleted
   186  // storage slot.
   187  //
   188  // Note, the returned slice is not a copy, so do not modify it.
   189  func (s *stateSet) storageList(accountHash common.Hash) []common.Hash {
   190  	s.listLock.RLock()
   191  	if _, ok := s.storageData[accountHash]; !ok {
   192  		// Account not tracked by this layer
   193  		s.listLock.RUnlock()
   194  		return nil
   195  	}
   196  	// If an old list already exists, return it
   197  	if list, exist := s.storageListSorted[accountHash]; exist {
   198  		s.listLock.RUnlock()
   199  		return list // the cached list can't be nil
   200  	}
   201  	s.listLock.RUnlock()
   202  
   203  	// No old sorted account list exists, generate a new one. It's possible that
   204  	// multiple threads waiting for the write lock may regenerate the list
   205  	// multiple times, which is acceptable.
   206  	s.listLock.Lock()
   207  	defer s.listLock.Unlock()
   208  
   209  	list := slices.SortedFunc(maps.Keys(s.storageData[accountHash]), common.Hash.Cmp)
   210  	s.storageListSorted[accountHash] = list
   211  	return list
   212  }
   213  
   214  // clearLists invalidates the cached account list and storage lists.
   215  func (s *stateSet) clearLists() {
   216  	s.listLock.Lock()
   217  	defer s.listLock.Unlock()
   218  
   219  	s.accountListSorted = nil
   220  	s.storageListSorted = make(map[common.Hash][]common.Hash)
   221  }
   222  
   223  // merge integrates the accounts and storages from the external set into the
   224  // local set, ensuring the combined set reflects the combined state of both.
   225  //
   226  // The stateSet supplied as parameter set will not be mutated by this operation,
   227  // as it may still be referenced by other layers.
   228  func (s *stateSet) merge(other *stateSet) {
   229  	var (
   230  		delta             int
   231  		accountOverwrites counter
   232  		storageOverwrites counter
   233  	)
   234  	// Apply the updated account data
   235  	for accountHash, data := range other.accountData {
   236  		if origin, ok := s.accountData[accountHash]; ok {
   237  			delta += len(data) - len(origin)
   238  			accountOverwrites.add(common.HashLength + len(origin))
   239  		} else {
   240  			delta += common.HashLength + len(data)
   241  		}
   242  		s.accountData[accountHash] = data
   243  	}
   244  	// Apply all the updated storage slots (individually)
   245  	for accountHash, storage := range other.storageData {
   246  		// If storage didn't exist in the set, overwrite blindly
   247  		if _, ok := s.storageData[accountHash]; !ok {
   248  			// To prevent potential concurrent map read/write issues, allocate a
   249  			// new map for the storage instead of claiming it directly from the
   250  			// passed external set. Even after merging, the slots belonging to the
   251  			// external state set remain accessible, so ownership of the map should
   252  			// not be taken, and any mutation on it should be avoided.
   253  			slots := make(map[common.Hash][]byte, len(storage))
   254  			for storageHash, data := range storage {
   255  				slots[storageHash] = data
   256  				delta += 2*common.HashLength + len(data)
   257  			}
   258  			s.storageData[accountHash] = slots
   259  			continue
   260  		}
   261  		// Storage exists in both local and external set, merge the slots
   262  		slots := s.storageData[accountHash]
   263  		for storageHash, data := range storage {
   264  			if origin, ok := slots[storageHash]; ok {
   265  				delta += len(data) - len(origin)
   266  				storageOverwrites.add(2*common.HashLength + len(origin))
   267  			} else {
   268  				delta += 2*common.HashLength + len(data)
   269  			}
   270  			slots[storageHash] = data
   271  		}
   272  	}
   273  	accountOverwrites.report(gcAccountMeter, gcAccountBytesMeter)
   274  	storageOverwrites.report(gcStorageMeter, gcStorageBytesMeter)
   275  	s.clearLists()
   276  	s.updateSize(delta)
   277  }
   278  
   279  // revertTo takes the original value of accounts and storages as input and reverts
   280  // the latest state transition applied on the state set.
   281  //
   282  // Notably, this operation may result in the set containing more entries after a
   283  // revert. For example, if account x did not exist and was created during transition
   284  // w, reverting w will retain an x=nil entry in the set. And also if account x along
   285  // with its storage slots was deleted in the transition w, reverting w will retain
   286  // a list of additional storage slots with their original value.
   287  func (s *stateSet) revertTo(accountOrigin map[common.Hash][]byte, storageOrigin map[common.Hash]map[common.Hash][]byte) {
   288  	var delta int // size tracking
   289  	for addrHash, blob := range accountOrigin {
   290  		data, ok := s.accountData[addrHash]
   291  		if !ok {
   292  			panic(fmt.Sprintf("non-existent account for reverting, %x", addrHash))
   293  		}
   294  		if len(data) == 0 && len(blob) == 0 {
   295  			panic(fmt.Sprintf("invalid account mutation (null to null), %x", addrHash))
   296  		}
   297  		delta += len(blob) - len(data)
   298  		s.accountData[addrHash] = blob
   299  	}
   300  	// Overwrite the storage data with original value blindly
   301  	for addrHash, storage := range storageOrigin {
   302  		slots := s.storageData[addrHash]
   303  		if len(slots) == 0 {
   304  			panic(fmt.Sprintf("non-existent storage set for reverting, %x", addrHash))
   305  		}
   306  		for storageHash, blob := range storage {
   307  			data, ok := slots[storageHash]
   308  			if !ok {
   309  				panic(fmt.Sprintf("non-existent storage slot for reverting, %x-%x", addrHash, storageHash))
   310  			}
   311  			if len(blob) == 0 && len(data) == 0 {
   312  				panic(fmt.Sprintf("invalid storage slot mutation (null to null), %x-%x", addrHash, storageHash))
   313  			}
   314  			delta += len(blob) - len(data)
   315  			slots[storageHash] = blob
   316  		}
   317  	}
   318  	s.clearLists()
   319  	s.updateSize(delta)
   320  }
   321  
   322  // updateSize updates the total cache size by the given delta.
   323  func (s *stateSet) updateSize(delta int) {
   324  	size := int64(s.size) + int64(delta)
   325  	if size >= 0 {
   326  		s.size = uint64(size)
   327  		return
   328  	}
   329  	log.Error("Stateset size underflow", "prev", common.StorageSize(s.size), "delta", common.StorageSize(delta))
   330  	s.size = 0
   331  }
   332  
   333  // encode serializes the content of state set into the provided writer.
   334  func (s *stateSet) encode(w io.Writer) error {
   335  	// Encode accounts
   336  	if err := rlp.Encode(w, s.rawStorageKey); err != nil {
   337  		return err
   338  	}
   339  	type accounts struct {
   340  		AddrHashes []common.Hash
   341  		Accounts   [][]byte
   342  	}
   343  	var enc accounts
   344  	for addrHash, blob := range s.accountData {
   345  		enc.AddrHashes = append(enc.AddrHashes, addrHash)
   346  		enc.Accounts = append(enc.Accounts, blob)
   347  	}
   348  	if err := rlp.Encode(w, enc); err != nil {
   349  		return err
   350  	}
   351  	// Encode storages
   352  	type Storage struct {
   353  		AddrHash common.Hash
   354  		Keys     []common.Hash
   355  		Vals     [][]byte
   356  	}
   357  	storages := make([]Storage, 0, len(s.storageData))
   358  	for addrHash, slots := range s.storageData {
   359  		keys := make([]common.Hash, 0, len(slots))
   360  		vals := make([][]byte, 0, len(slots))
   361  		for key, val := range slots {
   362  			keys = append(keys, key)
   363  			vals = append(vals, val)
   364  		}
   365  		storages = append(storages, Storage{
   366  			AddrHash: addrHash,
   367  			Keys:     keys,
   368  			Vals:     vals,
   369  		})
   370  	}
   371  	return rlp.Encode(w, storages)
   372  }
   373  
   374  // decode deserializes the content from the rlp stream into the state set.
   375  func (s *stateSet) decode(r *rlp.Stream) error {
   376  	if err := r.Decode(&s.rawStorageKey); err != nil {
   377  		return fmt.Errorf("load diff raw storage key flag: %v", err)
   378  	}
   379  	type accounts struct {
   380  		AddrHashes []common.Hash
   381  		Accounts   [][]byte
   382  	}
   383  	var (
   384  		dec        accounts
   385  		accountSet = make(map[common.Hash][]byte)
   386  	)
   387  	if err := r.Decode(&dec); err != nil {
   388  		return fmt.Errorf("load diff accounts: %v", err)
   389  	}
   390  	for i := range dec.AddrHashes {
   391  		accountSet[dec.AddrHashes[i]] = empty2nil(dec.Accounts[i])
   392  	}
   393  	s.accountData = accountSet
   394  
   395  	// Decode storages
   396  	type storage struct {
   397  		AddrHash common.Hash
   398  		Keys     []common.Hash
   399  		Vals     [][]byte
   400  	}
   401  	var (
   402  		storages   []storage
   403  		storageSet = make(map[common.Hash]map[common.Hash][]byte)
   404  	)
   405  	if err := r.Decode(&storages); err != nil {
   406  		return fmt.Errorf("load diff storage: %v", err)
   407  	}
   408  	for _, entry := range storages {
   409  		storageSet[entry.AddrHash] = make(map[common.Hash][]byte, len(entry.Keys))
   410  		for i := range entry.Keys {
   411  			storageSet[entry.AddrHash][entry.Keys[i]] = empty2nil(entry.Vals[i])
   412  		}
   413  	}
   414  	s.storageData = storageSet
   415  	s.storageListSorted = make(map[common.Hash][]common.Hash)
   416  
   417  	s.size = s.check()
   418  	return nil
   419  }
   420  
   421  // write flushes state mutations into the provided database batch as a whole.
   422  func (s *stateSet) write(batch ethdb.Batch, genMarker []byte, clean *fastcache.Cache) (int, int) {
   423  	return writeStates(batch, genMarker, s.accountData, s.storageData, clean)
   424  }
   425  
   426  // reset clears all cached state data, including any optional sorted lists that
   427  // may have been generated.
   428  func (s *stateSet) reset() {
   429  	s.accountData = make(map[common.Hash][]byte)
   430  	s.storageData = make(map[common.Hash]map[common.Hash][]byte)
   431  	s.size = 0
   432  	s.accountListSorted = nil
   433  	s.storageListSorted = make(map[common.Hash][]common.Hash)
   434  }
   435  
   436  // dbsize returns the approximate size for db write.
   437  func (s *stateSet) dbsize() int {
   438  	m := len(s.accountData) * len(rawdb.SnapshotAccountPrefix)
   439  	for _, slots := range s.storageData {
   440  		m += len(slots) * len(rawdb.SnapshotStoragePrefix)
   441  	}
   442  	return m + int(s.size)
   443  }
   444  
   445  // StateSetWithOrigin wraps the state set with additional original values of the
   446  // mutated states.
   447  type StateSetWithOrigin struct {
   448  	*stateSet
   449  
   450  	// accountOrigin represents the account data before the state transition,
   451  	// corresponding to both the accountData and destructSet. It's keyed by the
   452  	// account address. The nil value means the account was not present before.
   453  	accountOrigin map[common.Address][]byte
   454  
   455  	// storageOrigin represents the storage data before the state transition,
   456  	// corresponding to storageData and deleted slots of destructSet. It's keyed
   457  	// by the account address and slot key hash. The nil value means the slot was
   458  	// not present.
   459  	storageOrigin map[common.Address]map[common.Hash][]byte
   460  
   461  	// memory size of the state data (accountOrigin and storageOrigin)
   462  	size uint64
   463  }
   464  
   465  // NewStateSetWithOrigin constructs the state set with the provided data.
   466  func NewStateSetWithOrigin(accounts map[common.Hash][]byte, storages map[common.Hash]map[common.Hash][]byte, accountOrigin map[common.Address][]byte, storageOrigin map[common.Address]map[common.Hash][]byte, rawStorageKey bool) *StateSetWithOrigin {
   467  	// Don't panic for the lazy callers, initialize the nil maps instead.
   468  	if accountOrigin == nil {
   469  		accountOrigin = make(map[common.Address][]byte)
   470  	}
   471  	if storageOrigin == nil {
   472  		storageOrigin = make(map[common.Address]map[common.Hash][]byte)
   473  	}
   474  	// Count the memory size occupied by the set. Note that each slot key here
   475  	// uses 2*common.HashLength to keep consistent with the calculation method
   476  	// of stateSet.
   477  	var size int
   478  	for _, data := range accountOrigin {
   479  		size += common.HashLength + len(data)
   480  	}
   481  	for _, slots := range storageOrigin {
   482  		for _, data := range slots {
   483  			size += 2*common.HashLength + len(data)
   484  		}
   485  	}
   486  	set := newStates(accounts, storages, rawStorageKey)
   487  	return &StateSetWithOrigin{
   488  		stateSet:      set,
   489  		accountOrigin: accountOrigin,
   490  		storageOrigin: storageOrigin,
   491  		size:          set.size + uint64(size),
   492  	}
   493  }
   494  
   495  // encode serializes the content of state set into the provided writer.
   496  func (s *StateSetWithOrigin) encode(w io.Writer) error {
   497  	// Encode state set
   498  	if err := s.stateSet.encode(w); err != nil {
   499  		return err
   500  	}
   501  	// Encode accounts
   502  	type Accounts struct {
   503  		Addresses []common.Address
   504  		Accounts  [][]byte
   505  	}
   506  	var accounts Accounts
   507  	for address, blob := range s.accountOrigin {
   508  		accounts.Addresses = append(accounts.Addresses, address)
   509  		accounts.Accounts = append(accounts.Accounts, blob)
   510  	}
   511  	if err := rlp.Encode(w, accounts); err != nil {
   512  		return err
   513  	}
   514  	// Encode storages
   515  	type Storage struct {
   516  		Address common.Address
   517  		Keys    []common.Hash
   518  		Vals    [][]byte
   519  	}
   520  	storages := make([]Storage, 0, len(s.storageOrigin))
   521  	for address, slots := range s.storageOrigin {
   522  		keys := make([]common.Hash, 0, len(slots))
   523  		vals := make([][]byte, 0, len(slots))
   524  		for key, val := range slots {
   525  			keys = append(keys, key)
   526  			vals = append(vals, val)
   527  		}
   528  		storages = append(storages, Storage{Address: address, Keys: keys, Vals: vals})
   529  	}
   530  	return rlp.Encode(w, storages)
   531  }
   532  
   533  // decode deserializes the content from the rlp stream into the state set.
   534  func (s *StateSetWithOrigin) decode(r *rlp.Stream) error {
   535  	if s.stateSet == nil {
   536  		s.stateSet = &stateSet{}
   537  	}
   538  	if err := s.stateSet.decode(r); err != nil {
   539  		return err
   540  	}
   541  	// Decode account origin
   542  	type Accounts struct {
   543  		Addresses []common.Address
   544  		Accounts  [][]byte
   545  	}
   546  	var (
   547  		accounts   Accounts
   548  		accountSet = make(map[common.Address][]byte)
   549  	)
   550  	if err := r.Decode(&accounts); err != nil {
   551  		return fmt.Errorf("load diff account origin set: %v", err)
   552  	}
   553  	for i := range accounts.Accounts {
   554  		accountSet[accounts.Addresses[i]] = empty2nil(accounts.Accounts[i])
   555  	}
   556  	s.accountOrigin = accountSet
   557  
   558  	// Decode storage origin
   559  	type Storage struct {
   560  		Address common.Address
   561  		Keys    []common.Hash
   562  		Vals    [][]byte
   563  	}
   564  	var (
   565  		storages   []Storage
   566  		storageSet = make(map[common.Address]map[common.Hash][]byte)
   567  	)
   568  	if err := r.Decode(&storages); err != nil {
   569  		return fmt.Errorf("load diff storage origin: %v", err)
   570  	}
   571  	for _, storage := range storages {
   572  		storageSet[storage.Address] = make(map[common.Hash][]byte)
   573  		for i := range storage.Keys {
   574  			storageSet[storage.Address][storage.Keys[i]] = empty2nil(storage.Vals[i])
   575  		}
   576  	}
   577  	s.storageOrigin = storageSet
   578  	return nil
   579  }
   580  
   581  func empty2nil(b []byte) []byte {
   582  	if len(b) == 0 {
   583  		return nil
   584  	}
   585  	return b
   586  }