github.com/ethereum/go-ethereum@v1.16.1/triedb/pathdb/history_reader.go (about)

     1  // Copyright 2025 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/
    16  
    17  package pathdb
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/binary"
    22  	"errors"
    23  	"fmt"
    24  	"math"
    25  	"sort"
    26  
    27  	"github.com/ethereum/go-ethereum/common"
    28  	"github.com/ethereum/go-ethereum/core/rawdb"
    29  	"github.com/ethereum/go-ethereum/ethdb"
    30  )
    31  
    32  // stateIdent represents the identifier of a state element, which can be
    33  // either an account or a storage slot.
    34  type stateIdent struct {
    35  	account bool
    36  
    37  	// The hash of the account address. This is used instead of the raw account
    38  	// address is to align the traversal order with the Merkle-Patricia-Trie.
    39  	addressHash common.Hash
    40  
    41  	// The hash of the storage slot key. This is used instead of the raw slot key
    42  	// because, in legacy state histories (prior to the Cancun fork), the slot
    43  	// identifier is the hash of the key, and the original key (preimage) cannot
    44  	// be recovered. To maintain backward compatibility, the key hash is used.
    45  	//
    46  	// Meanwhile, using the storage key hash also preserve the traversal order
    47  	// with Merkle-Patricia-Trie.
    48  	//
    49  	// This field is null if the identifier refers to account data.
    50  	storageHash common.Hash
    51  }
    52  
    53  // String returns the string format state identifier.
    54  func (ident stateIdent) String() string {
    55  	if ident.account {
    56  		return ident.addressHash.Hex()
    57  	}
    58  	return ident.addressHash.Hex() + ident.storageHash.Hex()
    59  }
    60  
    61  // newAccountIdent constructs a state identifier for an account.
    62  func newAccountIdent(addressHash common.Hash) stateIdent {
    63  	return stateIdent{
    64  		account:     true,
    65  		addressHash: addressHash,
    66  	}
    67  }
    68  
    69  // newStorageIdent constructs a state identifier for a storage slot.
    70  // The address denotes the address of the associated account;
    71  // the storageHash denotes the hash of the raw storage slot key;
    72  func newStorageIdent(addressHash common.Hash, storageHash common.Hash) stateIdent {
    73  	return stateIdent{
    74  		addressHash: addressHash,
    75  		storageHash: storageHash,
    76  	}
    77  }
    78  
    79  // stateIdentQuery is the extension of stateIdent by adding the raw storage key.
    80  type stateIdentQuery struct {
    81  	stateIdent
    82  
    83  	address    common.Address
    84  	storageKey common.Hash
    85  }
    86  
    87  // newAccountIdentQuery constructs a state identifier for an account.
    88  func newAccountIdentQuery(address common.Address, addressHash common.Hash) stateIdentQuery {
    89  	return stateIdentQuery{
    90  		stateIdent: stateIdent{
    91  			account:     true,
    92  			addressHash: addressHash,
    93  		},
    94  		address: address,
    95  	}
    96  }
    97  
    98  // newStorageIdentQuery constructs a state identifier for a storage slot.
    99  // the address denotes the address of the associated account;
   100  // the addressHash denotes the address hash of the associated account;
   101  // the storageKey denotes the raw storage slot key;
   102  // the storageHash denotes the hash of the raw storage slot key;
   103  func newStorageIdentQuery(address common.Address, addressHash common.Hash, storageKey common.Hash, storageHash common.Hash) stateIdentQuery {
   104  	return stateIdentQuery{
   105  		stateIdent: stateIdent{
   106  			addressHash: addressHash,
   107  			storageHash: storageHash,
   108  		},
   109  		address:    address,
   110  		storageKey: storageKey,
   111  	}
   112  }
   113  
   114  // indexReaderWithLimitTag is a wrapper around indexReader that includes an
   115  // additional index position. This position represents the ID of the last
   116  // indexed state history at the time the reader was created, implying that
   117  // indexes beyond this position are unavailable.
   118  type indexReaderWithLimitTag struct {
   119  	reader *indexReader
   120  	limit  uint64
   121  	db     ethdb.KeyValueReader
   122  }
   123  
   124  // newIndexReaderWithLimitTag constructs a index reader with indexing position.
   125  func newIndexReaderWithLimitTag(db ethdb.KeyValueReader, state stateIdent) (*indexReaderWithLimitTag, error) {
   126  	// Read the last indexed ID before the index reader construction
   127  	metadata := loadIndexMetadata(db)
   128  	if metadata == nil {
   129  		return nil, errors.New("state history hasn't been indexed yet")
   130  	}
   131  	r, err := newIndexReader(db, state)
   132  	if err != nil {
   133  		return nil, err
   134  	}
   135  	return &indexReaderWithLimitTag{
   136  		reader: r,
   137  		limit:  metadata.Last,
   138  		db:     db,
   139  	}, nil
   140  }
   141  
   142  // readGreaterThan locates the first element that is greater than the specified
   143  // id. If no such element is found, MaxUint64 is returned.
   144  //
   145  // Note: It is possible that additional histories have been indexed since the
   146  // reader was created. The reader should be refreshed as needed to load the
   147  // latest indexed data from disk.
   148  func (r *indexReaderWithLimitTag) readGreaterThan(id uint64, lastID uint64) (uint64, error) {
   149  	// Mark the index reader as stale if the tracked indexing position moves
   150  	// backward. This can occur if the pathdb is reverted and certain state
   151  	// histories are unindexed. For simplicity, the reader is marked as stale
   152  	// instead of being refreshed, as this scenario is highly unlikely.
   153  	if r.limit > lastID {
   154  		return 0, fmt.Errorf("index reader is stale, limit: %d, last-state-id: %d", r.limit, lastID)
   155  	}
   156  	// Try to find the element which is greater than the specified target
   157  	res, err := r.reader.readGreaterThan(id)
   158  	if err != nil {
   159  		return 0, err
   160  	}
   161  	// Short circuit if the element is found within the current index
   162  	if res != math.MaxUint64 {
   163  		return res, nil
   164  	}
   165  	// The element was not found, and no additional histories have been indexed.
   166  	// Return a not-found result.
   167  	if r.limit == lastID {
   168  		return res, nil
   169  	}
   170  	// Refresh the index reader and attempt again. If the latest indexed position
   171  	// is even below the ID of the disk layer, it indicates that state histories
   172  	// are being removed. In this case, it would theoretically be better to block
   173  	// the state rollback operation synchronously until all readers are released.
   174  	// Given that it's very unlikely to occur and users try to perform historical
   175  	// state queries while reverting the states at the same time. Simply returning
   176  	// an error should be sufficient for now.
   177  	metadata := loadIndexMetadata(r.db)
   178  	if metadata == nil || metadata.Last < lastID {
   179  		return 0, errors.New("state history hasn't been indexed yet")
   180  	}
   181  	if err := r.reader.refresh(); err != nil {
   182  		return 0, err
   183  	}
   184  	r.limit = metadata.Last
   185  
   186  	return r.reader.readGreaterThan(id)
   187  }
   188  
   189  // historyReader is the structure to access historic state data.
   190  type historyReader struct {
   191  	disk    ethdb.KeyValueReader
   192  	freezer ethdb.AncientReader
   193  	readers map[string]*indexReaderWithLimitTag
   194  }
   195  
   196  // newHistoryReader constructs the history reader with the supplied db.
   197  func newHistoryReader(disk ethdb.KeyValueReader, freezer ethdb.AncientReader) *historyReader {
   198  	return &historyReader{
   199  		disk:    disk,
   200  		freezer: freezer,
   201  		readers: make(map[string]*indexReaderWithLimitTag),
   202  	}
   203  }
   204  
   205  // readAccountMetadata resolves the account metadata within the specified
   206  // state history.
   207  func (r *historyReader) readAccountMetadata(address common.Address, historyID uint64) ([]byte, error) {
   208  	blob := rawdb.ReadStateAccountIndex(r.freezer, historyID)
   209  	if len(blob) == 0 {
   210  		return nil, fmt.Errorf("account index is truncated, historyID: %d", historyID)
   211  	}
   212  	if len(blob)%accountIndexSize != 0 {
   213  		return nil, fmt.Errorf("account index is corrupted, historyID: %d, size: %d", historyID, len(blob))
   214  	}
   215  	n := len(blob) / accountIndexSize
   216  
   217  	pos := sort.Search(n, func(i int) bool {
   218  		h := blob[accountIndexSize*i : accountIndexSize*i+common.HashLength]
   219  		return bytes.Compare(h, address.Bytes()) >= 0
   220  	})
   221  	if pos == n {
   222  		return nil, fmt.Errorf("account %#x is not found", address)
   223  	}
   224  	offset := accountIndexSize * pos
   225  	if address != common.BytesToAddress(blob[offset:offset+common.AddressLength]) {
   226  		return nil, fmt.Errorf("account %#x is not found", address)
   227  	}
   228  	return blob[offset : accountIndexSize*(pos+1)], nil
   229  }
   230  
   231  // readStorageMetadata resolves the storage slot metadata within the specified
   232  // state history.
   233  func (r *historyReader) readStorageMetadata(storageKey common.Hash, storageHash common.Hash, historyID uint64, slotOffset, slotNumber int) ([]byte, error) {
   234  	// TODO(rj493456442) optimize it with partial read
   235  	blob := rawdb.ReadStateStorageIndex(r.freezer, historyID)
   236  	if len(blob) == 0 {
   237  		return nil, fmt.Errorf("storage index is truncated, historyID: %d", historyID)
   238  	}
   239  	if len(blob)%slotIndexSize != 0 {
   240  		return nil, fmt.Errorf("storage indices is corrupted, historyID: %d, size: %d", historyID, len(blob))
   241  	}
   242  	if slotIndexSize*(slotOffset+slotNumber) > len(blob) {
   243  		return nil, fmt.Errorf("storage indices is truncated, historyID: %d, size: %d, offset: %d, length: %d", historyID, len(blob), slotOffset, slotNumber)
   244  	}
   245  	subSlice := blob[slotIndexSize*slotOffset : slotIndexSize*(slotOffset+slotNumber)]
   246  
   247  	// TODO(rj493456442) get rid of the metadata resolution
   248  	var (
   249  		m      meta
   250  		target common.Hash
   251  	)
   252  	blob = rawdb.ReadStateHistoryMeta(r.freezer, historyID)
   253  	if err := m.decode(blob); err != nil {
   254  		return nil, err
   255  	}
   256  	if m.version == stateHistoryV0 {
   257  		target = storageHash
   258  	} else {
   259  		target = storageKey
   260  	}
   261  	pos := sort.Search(slotNumber, func(i int) bool {
   262  		slotID := subSlice[slotIndexSize*i : slotIndexSize*i+common.HashLength]
   263  		return bytes.Compare(slotID, target.Bytes()) >= 0
   264  	})
   265  	if pos == slotNumber {
   266  		return nil, fmt.Errorf("storage metadata is not found, slot key: %#x, historyID: %d", storageKey, historyID)
   267  	}
   268  	offset := slotIndexSize * pos
   269  	if target != common.BytesToHash(subSlice[offset:offset+common.HashLength]) {
   270  		return nil, fmt.Errorf("storage metadata is not found, slot key: %#x, historyID: %d", storageKey, historyID)
   271  	}
   272  	return subSlice[offset : slotIndexSize*(pos+1)], nil
   273  }
   274  
   275  // readAccount retrieves the account data from the specified state history.
   276  func (r *historyReader) readAccount(address common.Address, historyID uint64) ([]byte, error) {
   277  	metadata, err := r.readAccountMetadata(address, historyID)
   278  	if err != nil {
   279  		return nil, err
   280  	}
   281  	length := int(metadata[common.AddressLength])                                                     // one byte for account data length
   282  	offset := int(binary.BigEndian.Uint32(metadata[common.AddressLength+1 : common.AddressLength+5])) // four bytes for the account data offset
   283  
   284  	// TODO(rj493456442) optimize it with partial read
   285  	data := rawdb.ReadStateAccountHistory(r.freezer, historyID)
   286  	if len(data) < length+offset {
   287  		return nil, fmt.Errorf("account data is truncated, address: %#x, historyID: %d, size: %d, offset: %d, len: %d", address, historyID, len(data), offset, length)
   288  	}
   289  	return data[offset : offset+length], nil
   290  }
   291  
   292  // readStorage retrieves the storage slot data from the specified state history.
   293  func (r *historyReader) readStorage(address common.Address, storageKey common.Hash, storageHash common.Hash, historyID uint64) ([]byte, error) {
   294  	metadata, err := r.readAccountMetadata(address, historyID)
   295  	if err != nil {
   296  		return nil, err
   297  	}
   298  	// slotIndexOffset:
   299  	//   The offset of storage indices associated with the specified account.
   300  	// slotIndexNumber:
   301  	//   The number of storage indices associated with the specified account.
   302  	slotIndexOffset := int(binary.BigEndian.Uint32(metadata[common.AddressLength+5 : common.AddressLength+9]))
   303  	slotIndexNumber := int(binary.BigEndian.Uint32(metadata[common.AddressLength+9 : common.AddressLength+13]))
   304  
   305  	slotMetadata, err := r.readStorageMetadata(storageKey, storageHash, historyID, slotIndexOffset, slotIndexNumber)
   306  	if err != nil {
   307  		return nil, err
   308  	}
   309  	length := int(slotMetadata[common.HashLength])                                                  // one byte for slot data length
   310  	offset := int(binary.BigEndian.Uint32(slotMetadata[common.HashLength+1 : common.HashLength+5])) // four bytes for slot data offset
   311  
   312  	// TODO(rj493456442) optimize it with partial read
   313  	data := rawdb.ReadStateStorageHistory(r.freezer, historyID)
   314  	if len(data) < offset+length {
   315  		return nil, fmt.Errorf("storage data is truncated, address: %#x, key: %#x, historyID: %d, size: %d, offset: %d, len: %d", address, storageKey, historyID, len(data), offset, length)
   316  	}
   317  	return data[offset : offset+length], nil
   318  }
   319  
   320  // read retrieves the state element data associated with the stateID.
   321  // stateID: represents the ID of the state of the specified version;
   322  // lastID: represents the ID of the latest/newest state history;
   323  // latestValue: represents the state value at the current disk layer with ID == lastID;
   324  func (r *historyReader) read(state stateIdentQuery, stateID uint64, lastID uint64, latestValue []byte) ([]byte, error) {
   325  	tail, err := r.freezer.Tail()
   326  	if err != nil {
   327  		return nil, err
   328  	}
   329  	// stateID == tail is allowed, as the first history object preserved
   330  	// is tail+1
   331  	if stateID < tail {
   332  		return nil, errors.New("historical state has been pruned")
   333  	}
   334  
   335  	// To serve the request, all state histories from stateID+1 to lastID
   336  	// must be indexed. It's not supposed to happen unless system is very
   337  	// wrong.
   338  	metadata := loadIndexMetadata(r.disk)
   339  	if metadata == nil || metadata.Last < lastID {
   340  		indexed := "null"
   341  		if metadata != nil {
   342  			indexed = fmt.Sprintf("%d", metadata.Last)
   343  		}
   344  		return nil, fmt.Errorf("state history is not fully indexed, requested: %d, indexed: %s", stateID, indexed)
   345  	}
   346  
   347  	// Construct the index reader to locate the corresponding history for
   348  	// state retrieval
   349  	ir, ok := r.readers[state.String()]
   350  	if !ok {
   351  		ir, err = newIndexReaderWithLimitTag(r.disk, state.stateIdent)
   352  		if err != nil {
   353  			return nil, err
   354  		}
   355  		r.readers[state.String()] = ir
   356  	}
   357  	historyID, err := ir.readGreaterThan(stateID, lastID)
   358  	if err != nil {
   359  		return nil, err
   360  	}
   361  	// The state was not found in the state histories, as it has not been modified
   362  	// since stateID. Use the data from the associated disk layer instead.
   363  	if historyID == math.MaxUint64 {
   364  		return latestValue, nil
   365  	}
   366  	// Resolve data from the specified state history object. Notably, since the history
   367  	// reader operates completely asynchronously with the indexer/unindexer, it's possible
   368  	// that the associated state histories are no longer available due to a rollback.
   369  	// Such truncation should be captured by the state resolver below, rather than returning
   370  	// invalid data.
   371  	if state.account {
   372  		return r.readAccount(state.address, historyID)
   373  	}
   374  	return r.readStorage(state.address, state.storageKey, state.storageHash, historyID)
   375  }