github.com/ethereum/go-ethereum@v1.16.1/triedb/pathdb/history_reader.go (about) 1 // Copyright 2025 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/ 16 17 package pathdb 18 19 import ( 20 "bytes" 21 "encoding/binary" 22 "errors" 23 "fmt" 24 "math" 25 "sort" 26 27 "github.com/ethereum/go-ethereum/common" 28 "github.com/ethereum/go-ethereum/core/rawdb" 29 "github.com/ethereum/go-ethereum/ethdb" 30 ) 31 32 // stateIdent represents the identifier of a state element, which can be 33 // either an account or a storage slot. 34 type stateIdent struct { 35 account bool 36 37 // The hash of the account address. This is used instead of the raw account 38 // address is to align the traversal order with the Merkle-Patricia-Trie. 39 addressHash common.Hash 40 41 // The hash of the storage slot key. This is used instead of the raw slot key 42 // because, in legacy state histories (prior to the Cancun fork), the slot 43 // identifier is the hash of the key, and the original key (preimage) cannot 44 // be recovered. To maintain backward compatibility, the key hash is used. 45 // 46 // Meanwhile, using the storage key hash also preserve the traversal order 47 // with Merkle-Patricia-Trie. 48 // 49 // This field is null if the identifier refers to account data. 50 storageHash common.Hash 51 } 52 53 // String returns the string format state identifier. 54 func (ident stateIdent) String() string { 55 if ident.account { 56 return ident.addressHash.Hex() 57 } 58 return ident.addressHash.Hex() + ident.storageHash.Hex() 59 } 60 61 // newAccountIdent constructs a state identifier for an account. 62 func newAccountIdent(addressHash common.Hash) stateIdent { 63 return stateIdent{ 64 account: true, 65 addressHash: addressHash, 66 } 67 } 68 69 // newStorageIdent constructs a state identifier for a storage slot. 70 // The address denotes the address of the associated account; 71 // the storageHash denotes the hash of the raw storage slot key; 72 func newStorageIdent(addressHash common.Hash, storageHash common.Hash) stateIdent { 73 return stateIdent{ 74 addressHash: addressHash, 75 storageHash: storageHash, 76 } 77 } 78 79 // stateIdentQuery is the extension of stateIdent by adding the raw storage key. 80 type stateIdentQuery struct { 81 stateIdent 82 83 address common.Address 84 storageKey common.Hash 85 } 86 87 // newAccountIdentQuery constructs a state identifier for an account. 88 func newAccountIdentQuery(address common.Address, addressHash common.Hash) stateIdentQuery { 89 return stateIdentQuery{ 90 stateIdent: stateIdent{ 91 account: true, 92 addressHash: addressHash, 93 }, 94 address: address, 95 } 96 } 97 98 // newStorageIdentQuery constructs a state identifier for a storage slot. 99 // the address denotes the address of the associated account; 100 // the addressHash denotes the address hash of the associated account; 101 // the storageKey denotes the raw storage slot key; 102 // the storageHash denotes the hash of the raw storage slot key; 103 func newStorageIdentQuery(address common.Address, addressHash common.Hash, storageKey common.Hash, storageHash common.Hash) stateIdentQuery { 104 return stateIdentQuery{ 105 stateIdent: stateIdent{ 106 addressHash: addressHash, 107 storageHash: storageHash, 108 }, 109 address: address, 110 storageKey: storageKey, 111 } 112 } 113 114 // indexReaderWithLimitTag is a wrapper around indexReader that includes an 115 // additional index position. This position represents the ID of the last 116 // indexed state history at the time the reader was created, implying that 117 // indexes beyond this position are unavailable. 118 type indexReaderWithLimitTag struct { 119 reader *indexReader 120 limit uint64 121 db ethdb.KeyValueReader 122 } 123 124 // newIndexReaderWithLimitTag constructs a index reader with indexing position. 125 func newIndexReaderWithLimitTag(db ethdb.KeyValueReader, state stateIdent) (*indexReaderWithLimitTag, error) { 126 // Read the last indexed ID before the index reader construction 127 metadata := loadIndexMetadata(db) 128 if metadata == nil { 129 return nil, errors.New("state history hasn't been indexed yet") 130 } 131 r, err := newIndexReader(db, state) 132 if err != nil { 133 return nil, err 134 } 135 return &indexReaderWithLimitTag{ 136 reader: r, 137 limit: metadata.Last, 138 db: db, 139 }, nil 140 } 141 142 // readGreaterThan locates the first element that is greater than the specified 143 // id. If no such element is found, MaxUint64 is returned. 144 // 145 // Note: It is possible that additional histories have been indexed since the 146 // reader was created. The reader should be refreshed as needed to load the 147 // latest indexed data from disk. 148 func (r *indexReaderWithLimitTag) readGreaterThan(id uint64, lastID uint64) (uint64, error) { 149 // Mark the index reader as stale if the tracked indexing position moves 150 // backward. This can occur if the pathdb is reverted and certain state 151 // histories are unindexed. For simplicity, the reader is marked as stale 152 // instead of being refreshed, as this scenario is highly unlikely. 153 if r.limit > lastID { 154 return 0, fmt.Errorf("index reader is stale, limit: %d, last-state-id: %d", r.limit, lastID) 155 } 156 // Try to find the element which is greater than the specified target 157 res, err := r.reader.readGreaterThan(id) 158 if err != nil { 159 return 0, err 160 } 161 // Short circuit if the element is found within the current index 162 if res != math.MaxUint64 { 163 return res, nil 164 } 165 // The element was not found, and no additional histories have been indexed. 166 // Return a not-found result. 167 if r.limit == lastID { 168 return res, nil 169 } 170 // Refresh the index reader and attempt again. If the latest indexed position 171 // is even below the ID of the disk layer, it indicates that state histories 172 // are being removed. In this case, it would theoretically be better to block 173 // the state rollback operation synchronously until all readers are released. 174 // Given that it's very unlikely to occur and users try to perform historical 175 // state queries while reverting the states at the same time. Simply returning 176 // an error should be sufficient for now. 177 metadata := loadIndexMetadata(r.db) 178 if metadata == nil || metadata.Last < lastID { 179 return 0, errors.New("state history hasn't been indexed yet") 180 } 181 if err := r.reader.refresh(); err != nil { 182 return 0, err 183 } 184 r.limit = metadata.Last 185 186 return r.reader.readGreaterThan(id) 187 } 188 189 // historyReader is the structure to access historic state data. 190 type historyReader struct { 191 disk ethdb.KeyValueReader 192 freezer ethdb.AncientReader 193 readers map[string]*indexReaderWithLimitTag 194 } 195 196 // newHistoryReader constructs the history reader with the supplied db. 197 func newHistoryReader(disk ethdb.KeyValueReader, freezer ethdb.AncientReader) *historyReader { 198 return &historyReader{ 199 disk: disk, 200 freezer: freezer, 201 readers: make(map[string]*indexReaderWithLimitTag), 202 } 203 } 204 205 // readAccountMetadata resolves the account metadata within the specified 206 // state history. 207 func (r *historyReader) readAccountMetadata(address common.Address, historyID uint64) ([]byte, error) { 208 blob := rawdb.ReadStateAccountIndex(r.freezer, historyID) 209 if len(blob) == 0 { 210 return nil, fmt.Errorf("account index is truncated, historyID: %d", historyID) 211 } 212 if len(blob)%accountIndexSize != 0 { 213 return nil, fmt.Errorf("account index is corrupted, historyID: %d, size: %d", historyID, len(blob)) 214 } 215 n := len(blob) / accountIndexSize 216 217 pos := sort.Search(n, func(i int) bool { 218 h := blob[accountIndexSize*i : accountIndexSize*i+common.HashLength] 219 return bytes.Compare(h, address.Bytes()) >= 0 220 }) 221 if pos == n { 222 return nil, fmt.Errorf("account %#x is not found", address) 223 } 224 offset := accountIndexSize * pos 225 if address != common.BytesToAddress(blob[offset:offset+common.AddressLength]) { 226 return nil, fmt.Errorf("account %#x is not found", address) 227 } 228 return blob[offset : accountIndexSize*(pos+1)], nil 229 } 230 231 // readStorageMetadata resolves the storage slot metadata within the specified 232 // state history. 233 func (r *historyReader) readStorageMetadata(storageKey common.Hash, storageHash common.Hash, historyID uint64, slotOffset, slotNumber int) ([]byte, error) { 234 // TODO(rj493456442) optimize it with partial read 235 blob := rawdb.ReadStateStorageIndex(r.freezer, historyID) 236 if len(blob) == 0 { 237 return nil, fmt.Errorf("storage index is truncated, historyID: %d", historyID) 238 } 239 if len(blob)%slotIndexSize != 0 { 240 return nil, fmt.Errorf("storage indices is corrupted, historyID: %d, size: %d", historyID, len(blob)) 241 } 242 if slotIndexSize*(slotOffset+slotNumber) > len(blob) { 243 return nil, fmt.Errorf("storage indices is truncated, historyID: %d, size: %d, offset: %d, length: %d", historyID, len(blob), slotOffset, slotNumber) 244 } 245 subSlice := blob[slotIndexSize*slotOffset : slotIndexSize*(slotOffset+slotNumber)] 246 247 // TODO(rj493456442) get rid of the metadata resolution 248 var ( 249 m meta 250 target common.Hash 251 ) 252 blob = rawdb.ReadStateHistoryMeta(r.freezer, historyID) 253 if err := m.decode(blob); err != nil { 254 return nil, err 255 } 256 if m.version == stateHistoryV0 { 257 target = storageHash 258 } else { 259 target = storageKey 260 } 261 pos := sort.Search(slotNumber, func(i int) bool { 262 slotID := subSlice[slotIndexSize*i : slotIndexSize*i+common.HashLength] 263 return bytes.Compare(slotID, target.Bytes()) >= 0 264 }) 265 if pos == slotNumber { 266 return nil, fmt.Errorf("storage metadata is not found, slot key: %#x, historyID: %d", storageKey, historyID) 267 } 268 offset := slotIndexSize * pos 269 if target != common.BytesToHash(subSlice[offset:offset+common.HashLength]) { 270 return nil, fmt.Errorf("storage metadata is not found, slot key: %#x, historyID: %d", storageKey, historyID) 271 } 272 return subSlice[offset : slotIndexSize*(pos+1)], nil 273 } 274 275 // readAccount retrieves the account data from the specified state history. 276 func (r *historyReader) readAccount(address common.Address, historyID uint64) ([]byte, error) { 277 metadata, err := r.readAccountMetadata(address, historyID) 278 if err != nil { 279 return nil, err 280 } 281 length := int(metadata[common.AddressLength]) // one byte for account data length 282 offset := int(binary.BigEndian.Uint32(metadata[common.AddressLength+1 : common.AddressLength+5])) // four bytes for the account data offset 283 284 // TODO(rj493456442) optimize it with partial read 285 data := rawdb.ReadStateAccountHistory(r.freezer, historyID) 286 if len(data) < length+offset { 287 return nil, fmt.Errorf("account data is truncated, address: %#x, historyID: %d, size: %d, offset: %d, len: %d", address, historyID, len(data), offset, length) 288 } 289 return data[offset : offset+length], nil 290 } 291 292 // readStorage retrieves the storage slot data from the specified state history. 293 func (r *historyReader) readStorage(address common.Address, storageKey common.Hash, storageHash common.Hash, historyID uint64) ([]byte, error) { 294 metadata, err := r.readAccountMetadata(address, historyID) 295 if err != nil { 296 return nil, err 297 } 298 // slotIndexOffset: 299 // The offset of storage indices associated with the specified account. 300 // slotIndexNumber: 301 // The number of storage indices associated with the specified account. 302 slotIndexOffset := int(binary.BigEndian.Uint32(metadata[common.AddressLength+5 : common.AddressLength+9])) 303 slotIndexNumber := int(binary.BigEndian.Uint32(metadata[common.AddressLength+9 : common.AddressLength+13])) 304 305 slotMetadata, err := r.readStorageMetadata(storageKey, storageHash, historyID, slotIndexOffset, slotIndexNumber) 306 if err != nil { 307 return nil, err 308 } 309 length := int(slotMetadata[common.HashLength]) // one byte for slot data length 310 offset := int(binary.BigEndian.Uint32(slotMetadata[common.HashLength+1 : common.HashLength+5])) // four bytes for slot data offset 311 312 // TODO(rj493456442) optimize it with partial read 313 data := rawdb.ReadStateStorageHistory(r.freezer, historyID) 314 if len(data) < offset+length { 315 return nil, fmt.Errorf("storage data is truncated, address: %#x, key: %#x, historyID: %d, size: %d, offset: %d, len: %d", address, storageKey, historyID, len(data), offset, length) 316 } 317 return data[offset : offset+length], nil 318 } 319 320 // read retrieves the state element data associated with the stateID. 321 // stateID: represents the ID of the state of the specified version; 322 // lastID: represents the ID of the latest/newest state history; 323 // latestValue: represents the state value at the current disk layer with ID == lastID; 324 func (r *historyReader) read(state stateIdentQuery, stateID uint64, lastID uint64, latestValue []byte) ([]byte, error) { 325 tail, err := r.freezer.Tail() 326 if err != nil { 327 return nil, err 328 } 329 // stateID == tail is allowed, as the first history object preserved 330 // is tail+1 331 if stateID < tail { 332 return nil, errors.New("historical state has been pruned") 333 } 334 335 // To serve the request, all state histories from stateID+1 to lastID 336 // must be indexed. It's not supposed to happen unless system is very 337 // wrong. 338 metadata := loadIndexMetadata(r.disk) 339 if metadata == nil || metadata.Last < lastID { 340 indexed := "null" 341 if metadata != nil { 342 indexed = fmt.Sprintf("%d", metadata.Last) 343 } 344 return nil, fmt.Errorf("state history is not fully indexed, requested: %d, indexed: %s", stateID, indexed) 345 } 346 347 // Construct the index reader to locate the corresponding history for 348 // state retrieval 349 ir, ok := r.readers[state.String()] 350 if !ok { 351 ir, err = newIndexReaderWithLimitTag(r.disk, state.stateIdent) 352 if err != nil { 353 return nil, err 354 } 355 r.readers[state.String()] = ir 356 } 357 historyID, err := ir.readGreaterThan(stateID, lastID) 358 if err != nil { 359 return nil, err 360 } 361 // The state was not found in the state histories, as it has not been modified 362 // since stateID. Use the data from the associated disk layer instead. 363 if historyID == math.MaxUint64 { 364 return latestValue, nil 365 } 366 // Resolve data from the specified state history object. Notably, since the history 367 // reader operates completely asynchronously with the indexer/unindexer, it's possible 368 // that the associated state histories are no longer available due to a rollback. 369 // Such truncation should be captured by the state resolver below, rather than returning 370 // invalid data. 371 if state.account { 372 return r.readAccount(state.address, historyID) 373 } 374 return r.readStorage(state.address, state.storageKey, state.storageHash, historyID) 375 }