github.com/ethereum/go-ethereum@v1.16.1/triedb/pathdb/states.go (about) 1 // Copyright 2024 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package pathdb 18 19 import ( 20 "fmt" 21 "io" 22 "maps" 23 "slices" 24 "sync" 25 26 "github.com/VictoriaMetrics/fastcache" 27 "github.com/ethereum/go-ethereum/common" 28 "github.com/ethereum/go-ethereum/core/rawdb" 29 "github.com/ethereum/go-ethereum/ethdb" 30 "github.com/ethereum/go-ethereum/log" 31 "github.com/ethereum/go-ethereum/metrics" 32 "github.com/ethereum/go-ethereum/rlp" 33 ) 34 35 // counter helps in tracking items and their corresponding sizes. 36 type counter struct { 37 n int 38 size int 39 } 40 41 // add size to the counter and increase the item counter. 42 func (c *counter) add(size int) { 43 c.n++ 44 c.size += size 45 } 46 47 // report uploads the cached statistics to meters. 48 func (c *counter) report(count, size *metrics.Meter) { 49 count.Mark(int64(c.n)) 50 size.Mark(int64(c.size)) 51 } 52 53 // stateSet represents a collection of state modifications associated with a 54 // transition (e.g., a block execution) or multiple aggregated transitions. 55 // 56 // A stateSet can only reside within a diffLayer or the buffer of a diskLayer, 57 // serving as the envelope for the set. Lock protection is not required for 58 // accessing or mutating the account set and storage set, as the associated 59 // envelope is always marked as stale before any mutation is applied. Any 60 // subsequent state access will be denied due to the stale flag. Therefore, 61 // state access and mutation won't happen at the same time with guarantee. 62 type stateSet struct { 63 accountData map[common.Hash][]byte // Keyed accounts for direct retrieval (nil means deleted) 64 storageData map[common.Hash]map[common.Hash][]byte // Keyed storage slots for direct retrieval. one per account (nil means deleted) 65 size uint64 // Memory size of the state data (accountData and storageData) 66 67 accountListSorted []common.Hash // List of account for iteration. If it exists, it's sorted, otherwise it's nil 68 storageListSorted map[common.Hash][]common.Hash // List of storage slots for iterated retrievals, one per account. Any existing lists are sorted if non-nil 69 70 rawStorageKey bool // indicates whether the storage set uses the raw slot key or the hash 71 72 // Lock for guarding the two lists above. These lists might be accessed 73 // concurrently and lock protection is essential to avoid concurrent 74 // slice or map read/write. 75 listLock sync.RWMutex 76 } 77 78 // newStates constructs the state set with the provided account and storage data. 79 func newStates(accounts map[common.Hash][]byte, storages map[common.Hash]map[common.Hash][]byte, rawStorageKey bool) *stateSet { 80 // Don't panic for the lazy callers, initialize the nil maps instead. 81 if accounts == nil { 82 accounts = make(map[common.Hash][]byte) 83 } 84 if storages == nil { 85 storages = make(map[common.Hash]map[common.Hash][]byte) 86 } 87 s := &stateSet{ 88 accountData: accounts, 89 storageData: storages, 90 rawStorageKey: rawStorageKey, 91 storageListSorted: make(map[common.Hash][]common.Hash), 92 } 93 s.size = s.check() 94 return s 95 } 96 97 // account returns the account data associated with the specified address hash. 98 func (s *stateSet) account(hash common.Hash) ([]byte, bool) { 99 // If the account is known locally, return it 100 if data, ok := s.accountData[hash]; ok { 101 return data, true 102 } 103 return nil, false // account is unknown in this set 104 } 105 106 // mustAccount returns the account data associated with the specified address 107 // hash. The difference is this function will return an error if the account 108 // is not found. 109 func (s *stateSet) mustAccount(hash common.Hash) ([]byte, error) { 110 // If the account is known locally, return it 111 if data, ok := s.accountData[hash]; ok { 112 return data, nil 113 } 114 return nil, fmt.Errorf("account is not found, %x", hash) 115 } 116 117 // storage returns the storage slot associated with the specified address hash 118 // and storage key hash. 119 func (s *stateSet) storage(accountHash, storageHash common.Hash) ([]byte, bool) { 120 // If the account is known locally, try to resolve the slot locally 121 if storage, ok := s.storageData[accountHash]; ok { 122 if data, ok := storage[storageHash]; ok { 123 return data, true 124 } 125 } 126 return nil, false // storage is unknown in this set 127 } 128 129 // mustStorage returns the storage slot associated with the specified address 130 // hash and storage key hash. The difference is this function will return an 131 // error if the storage slot is not found. 132 func (s *stateSet) mustStorage(accountHash, storageHash common.Hash) ([]byte, error) { 133 // If the account is known locally, try to resolve the slot locally 134 if storage, ok := s.storageData[accountHash]; ok { 135 if data, ok := storage[storageHash]; ok { 136 return data, nil 137 } 138 } 139 return nil, fmt.Errorf("storage slot is not found, %x %x", accountHash, storageHash) 140 } 141 142 // check sanitizes accounts and storage slots to ensure the data validity. 143 // Additionally, it computes the total memory size occupied by the maps. 144 func (s *stateSet) check() uint64 { 145 var size int 146 for _, blob := range s.accountData { 147 size += common.HashLength + len(blob) 148 } 149 for accountHash, slots := range s.storageData { 150 if slots == nil { 151 panic(fmt.Sprintf("storage %#x nil", accountHash)) // nil slots is not permitted 152 } 153 for _, blob := range slots { 154 size += 2*common.HashLength + len(blob) 155 } 156 } 157 return uint64(size) 158 } 159 160 // accountList returns a sorted list of all accounts in this state set, including 161 // the deleted ones. 162 // 163 // Note, the returned slice is not a copy, so do not modify it. 164 func (s *stateSet) accountList() []common.Hash { 165 // If an old list already exists, return it 166 s.listLock.RLock() 167 list := s.accountListSorted 168 s.listLock.RUnlock() 169 170 if list != nil { 171 return list 172 } 173 // No old sorted account list exists, generate a new one. It's possible that 174 // multiple threads waiting for the write lock may regenerate the list 175 // multiple times, which is acceptable. 176 s.listLock.Lock() 177 defer s.listLock.Unlock() 178 179 list = slices.SortedFunc(maps.Keys(s.accountData), common.Hash.Cmp) 180 s.accountListSorted = list 181 return list 182 } 183 184 // StorageList returns a sorted list of all storage slot hashes in this state set 185 // for the given account. The returned list will include the hash of deleted 186 // storage slot. 187 // 188 // Note, the returned slice is not a copy, so do not modify it. 189 func (s *stateSet) storageList(accountHash common.Hash) []common.Hash { 190 s.listLock.RLock() 191 if _, ok := s.storageData[accountHash]; !ok { 192 // Account not tracked by this layer 193 s.listLock.RUnlock() 194 return nil 195 } 196 // If an old list already exists, return it 197 if list, exist := s.storageListSorted[accountHash]; exist { 198 s.listLock.RUnlock() 199 return list // the cached list can't be nil 200 } 201 s.listLock.RUnlock() 202 203 // No old sorted account list exists, generate a new one. It's possible that 204 // multiple threads waiting for the write lock may regenerate the list 205 // multiple times, which is acceptable. 206 s.listLock.Lock() 207 defer s.listLock.Unlock() 208 209 list := slices.SortedFunc(maps.Keys(s.storageData[accountHash]), common.Hash.Cmp) 210 s.storageListSorted[accountHash] = list 211 return list 212 } 213 214 // clearLists invalidates the cached account list and storage lists. 215 func (s *stateSet) clearLists() { 216 s.listLock.Lock() 217 defer s.listLock.Unlock() 218 219 s.accountListSorted = nil 220 s.storageListSorted = make(map[common.Hash][]common.Hash) 221 } 222 223 // merge integrates the accounts and storages from the external set into the 224 // local set, ensuring the combined set reflects the combined state of both. 225 // 226 // The stateSet supplied as parameter set will not be mutated by this operation, 227 // as it may still be referenced by other layers. 228 func (s *stateSet) merge(other *stateSet) { 229 var ( 230 delta int 231 accountOverwrites counter 232 storageOverwrites counter 233 ) 234 // Apply the updated account data 235 for accountHash, data := range other.accountData { 236 if origin, ok := s.accountData[accountHash]; ok { 237 delta += len(data) - len(origin) 238 accountOverwrites.add(common.HashLength + len(origin)) 239 } else { 240 delta += common.HashLength + len(data) 241 } 242 s.accountData[accountHash] = data 243 } 244 // Apply all the updated storage slots (individually) 245 for accountHash, storage := range other.storageData { 246 // If storage didn't exist in the set, overwrite blindly 247 if _, ok := s.storageData[accountHash]; !ok { 248 // To prevent potential concurrent map read/write issues, allocate a 249 // new map for the storage instead of claiming it directly from the 250 // passed external set. Even after merging, the slots belonging to the 251 // external state set remain accessible, so ownership of the map should 252 // not be taken, and any mutation on it should be avoided. 253 slots := make(map[common.Hash][]byte, len(storage)) 254 for storageHash, data := range storage { 255 slots[storageHash] = data 256 delta += 2*common.HashLength + len(data) 257 } 258 s.storageData[accountHash] = slots 259 continue 260 } 261 // Storage exists in both local and external set, merge the slots 262 slots := s.storageData[accountHash] 263 for storageHash, data := range storage { 264 if origin, ok := slots[storageHash]; ok { 265 delta += len(data) - len(origin) 266 storageOverwrites.add(2*common.HashLength + len(origin)) 267 } else { 268 delta += 2*common.HashLength + len(data) 269 } 270 slots[storageHash] = data 271 } 272 } 273 accountOverwrites.report(gcAccountMeter, gcAccountBytesMeter) 274 storageOverwrites.report(gcStorageMeter, gcStorageBytesMeter) 275 s.clearLists() 276 s.updateSize(delta) 277 } 278 279 // revertTo takes the original value of accounts and storages as input and reverts 280 // the latest state transition applied on the state set. 281 // 282 // Notably, this operation may result in the set containing more entries after a 283 // revert. For example, if account x did not exist and was created during transition 284 // w, reverting w will retain an x=nil entry in the set. And also if account x along 285 // with its storage slots was deleted in the transition w, reverting w will retain 286 // a list of additional storage slots with their original value. 287 func (s *stateSet) revertTo(accountOrigin map[common.Hash][]byte, storageOrigin map[common.Hash]map[common.Hash][]byte) { 288 var delta int // size tracking 289 for addrHash, blob := range accountOrigin { 290 data, ok := s.accountData[addrHash] 291 if !ok { 292 panic(fmt.Sprintf("non-existent account for reverting, %x", addrHash)) 293 } 294 if len(data) == 0 && len(blob) == 0 { 295 panic(fmt.Sprintf("invalid account mutation (null to null), %x", addrHash)) 296 } 297 delta += len(blob) - len(data) 298 s.accountData[addrHash] = blob 299 } 300 // Overwrite the storage data with original value blindly 301 for addrHash, storage := range storageOrigin { 302 slots := s.storageData[addrHash] 303 if len(slots) == 0 { 304 panic(fmt.Sprintf("non-existent storage set for reverting, %x", addrHash)) 305 } 306 for storageHash, blob := range storage { 307 data, ok := slots[storageHash] 308 if !ok { 309 panic(fmt.Sprintf("non-existent storage slot for reverting, %x-%x", addrHash, storageHash)) 310 } 311 if len(blob) == 0 && len(data) == 0 { 312 panic(fmt.Sprintf("invalid storage slot mutation (null to null), %x-%x", addrHash, storageHash)) 313 } 314 delta += len(blob) - len(data) 315 slots[storageHash] = blob 316 } 317 } 318 s.clearLists() 319 s.updateSize(delta) 320 } 321 322 // updateSize updates the total cache size by the given delta. 323 func (s *stateSet) updateSize(delta int) { 324 size := int64(s.size) + int64(delta) 325 if size >= 0 { 326 s.size = uint64(size) 327 return 328 } 329 log.Error("Stateset size underflow", "prev", common.StorageSize(s.size), "delta", common.StorageSize(delta)) 330 s.size = 0 331 } 332 333 // encode serializes the content of state set into the provided writer. 334 func (s *stateSet) encode(w io.Writer) error { 335 // Encode accounts 336 if err := rlp.Encode(w, s.rawStorageKey); err != nil { 337 return err 338 } 339 type accounts struct { 340 AddrHashes []common.Hash 341 Accounts [][]byte 342 } 343 var enc accounts 344 for addrHash, blob := range s.accountData { 345 enc.AddrHashes = append(enc.AddrHashes, addrHash) 346 enc.Accounts = append(enc.Accounts, blob) 347 } 348 if err := rlp.Encode(w, enc); err != nil { 349 return err 350 } 351 // Encode storages 352 type Storage struct { 353 AddrHash common.Hash 354 Keys []common.Hash 355 Vals [][]byte 356 } 357 storages := make([]Storage, 0, len(s.storageData)) 358 for addrHash, slots := range s.storageData { 359 keys := make([]common.Hash, 0, len(slots)) 360 vals := make([][]byte, 0, len(slots)) 361 for key, val := range slots { 362 keys = append(keys, key) 363 vals = append(vals, val) 364 } 365 storages = append(storages, Storage{ 366 AddrHash: addrHash, 367 Keys: keys, 368 Vals: vals, 369 }) 370 } 371 return rlp.Encode(w, storages) 372 } 373 374 // decode deserializes the content from the rlp stream into the state set. 375 func (s *stateSet) decode(r *rlp.Stream) error { 376 if err := r.Decode(&s.rawStorageKey); err != nil { 377 return fmt.Errorf("load diff raw storage key flag: %v", err) 378 } 379 type accounts struct { 380 AddrHashes []common.Hash 381 Accounts [][]byte 382 } 383 var ( 384 dec accounts 385 accountSet = make(map[common.Hash][]byte) 386 ) 387 if err := r.Decode(&dec); err != nil { 388 return fmt.Errorf("load diff accounts: %v", err) 389 } 390 for i := range dec.AddrHashes { 391 accountSet[dec.AddrHashes[i]] = empty2nil(dec.Accounts[i]) 392 } 393 s.accountData = accountSet 394 395 // Decode storages 396 type storage struct { 397 AddrHash common.Hash 398 Keys []common.Hash 399 Vals [][]byte 400 } 401 var ( 402 storages []storage 403 storageSet = make(map[common.Hash]map[common.Hash][]byte) 404 ) 405 if err := r.Decode(&storages); err != nil { 406 return fmt.Errorf("load diff storage: %v", err) 407 } 408 for _, entry := range storages { 409 storageSet[entry.AddrHash] = make(map[common.Hash][]byte, len(entry.Keys)) 410 for i := range entry.Keys { 411 storageSet[entry.AddrHash][entry.Keys[i]] = empty2nil(entry.Vals[i]) 412 } 413 } 414 s.storageData = storageSet 415 s.storageListSorted = make(map[common.Hash][]common.Hash) 416 417 s.size = s.check() 418 return nil 419 } 420 421 // write flushes state mutations into the provided database batch as a whole. 422 func (s *stateSet) write(batch ethdb.Batch, genMarker []byte, clean *fastcache.Cache) (int, int) { 423 return writeStates(batch, genMarker, s.accountData, s.storageData, clean) 424 } 425 426 // reset clears all cached state data, including any optional sorted lists that 427 // may have been generated. 428 func (s *stateSet) reset() { 429 s.accountData = make(map[common.Hash][]byte) 430 s.storageData = make(map[common.Hash]map[common.Hash][]byte) 431 s.size = 0 432 s.accountListSorted = nil 433 s.storageListSorted = make(map[common.Hash][]common.Hash) 434 } 435 436 // dbsize returns the approximate size for db write. 437 func (s *stateSet) dbsize() int { 438 m := len(s.accountData) * len(rawdb.SnapshotAccountPrefix) 439 for _, slots := range s.storageData { 440 m += len(slots) * len(rawdb.SnapshotStoragePrefix) 441 } 442 return m + int(s.size) 443 } 444 445 // StateSetWithOrigin wraps the state set with additional original values of the 446 // mutated states. 447 type StateSetWithOrigin struct { 448 *stateSet 449 450 // accountOrigin represents the account data before the state transition, 451 // corresponding to both the accountData and destructSet. It's keyed by the 452 // account address. The nil value means the account was not present before. 453 accountOrigin map[common.Address][]byte 454 455 // storageOrigin represents the storage data before the state transition, 456 // corresponding to storageData and deleted slots of destructSet. It's keyed 457 // by the account address and slot key hash. The nil value means the slot was 458 // not present. 459 storageOrigin map[common.Address]map[common.Hash][]byte 460 461 // memory size of the state data (accountOrigin and storageOrigin) 462 size uint64 463 } 464 465 // NewStateSetWithOrigin constructs the state set with the provided data. 466 func NewStateSetWithOrigin(accounts map[common.Hash][]byte, storages map[common.Hash]map[common.Hash][]byte, accountOrigin map[common.Address][]byte, storageOrigin map[common.Address]map[common.Hash][]byte, rawStorageKey bool) *StateSetWithOrigin { 467 // Don't panic for the lazy callers, initialize the nil maps instead. 468 if accountOrigin == nil { 469 accountOrigin = make(map[common.Address][]byte) 470 } 471 if storageOrigin == nil { 472 storageOrigin = make(map[common.Address]map[common.Hash][]byte) 473 } 474 // Count the memory size occupied by the set. Note that each slot key here 475 // uses 2*common.HashLength to keep consistent with the calculation method 476 // of stateSet. 477 var size int 478 for _, data := range accountOrigin { 479 size += common.HashLength + len(data) 480 } 481 for _, slots := range storageOrigin { 482 for _, data := range slots { 483 size += 2*common.HashLength + len(data) 484 } 485 } 486 set := newStates(accounts, storages, rawStorageKey) 487 return &StateSetWithOrigin{ 488 stateSet: set, 489 accountOrigin: accountOrigin, 490 storageOrigin: storageOrigin, 491 size: set.size + uint64(size), 492 } 493 } 494 495 // encode serializes the content of state set into the provided writer. 496 func (s *StateSetWithOrigin) encode(w io.Writer) error { 497 // Encode state set 498 if err := s.stateSet.encode(w); err != nil { 499 return err 500 } 501 // Encode accounts 502 type Accounts struct { 503 Addresses []common.Address 504 Accounts [][]byte 505 } 506 var accounts Accounts 507 for address, blob := range s.accountOrigin { 508 accounts.Addresses = append(accounts.Addresses, address) 509 accounts.Accounts = append(accounts.Accounts, blob) 510 } 511 if err := rlp.Encode(w, accounts); err != nil { 512 return err 513 } 514 // Encode storages 515 type Storage struct { 516 Address common.Address 517 Keys []common.Hash 518 Vals [][]byte 519 } 520 storages := make([]Storage, 0, len(s.storageOrigin)) 521 for address, slots := range s.storageOrigin { 522 keys := make([]common.Hash, 0, len(slots)) 523 vals := make([][]byte, 0, len(slots)) 524 for key, val := range slots { 525 keys = append(keys, key) 526 vals = append(vals, val) 527 } 528 storages = append(storages, Storage{Address: address, Keys: keys, Vals: vals}) 529 } 530 return rlp.Encode(w, storages) 531 } 532 533 // decode deserializes the content from the rlp stream into the state set. 534 func (s *StateSetWithOrigin) decode(r *rlp.Stream) error { 535 if s.stateSet == nil { 536 s.stateSet = &stateSet{} 537 } 538 if err := s.stateSet.decode(r); err != nil { 539 return err 540 } 541 // Decode account origin 542 type Accounts struct { 543 Addresses []common.Address 544 Accounts [][]byte 545 } 546 var ( 547 accounts Accounts 548 accountSet = make(map[common.Address][]byte) 549 ) 550 if err := r.Decode(&accounts); err != nil { 551 return fmt.Errorf("load diff account origin set: %v", err) 552 } 553 for i := range accounts.Accounts { 554 accountSet[accounts.Addresses[i]] = empty2nil(accounts.Accounts[i]) 555 } 556 s.accountOrigin = accountSet 557 558 // Decode storage origin 559 type Storage struct { 560 Address common.Address 561 Keys []common.Hash 562 Vals [][]byte 563 } 564 var ( 565 storages []Storage 566 storageSet = make(map[common.Address]map[common.Hash][]byte) 567 ) 568 if err := r.Decode(&storages); err != nil { 569 return fmt.Errorf("load diff storage origin: %v", err) 570 } 571 for _, storage := range storages { 572 storageSet[storage.Address] = make(map[common.Hash][]byte) 573 for i := range storage.Keys { 574 storageSet[storage.Address][storage.Keys[i]] = empty2nil(storage.Vals[i]) 575 } 576 } 577 s.storageOrigin = storageSet 578 return nil 579 } 580 581 func empty2nil(b []byte) []byte { 582 if len(b) == 0 { 583 return nil 584 } 585 return b 586 }