github.com/ethereum/go-ethereum@v1.16.1/triedb/pathdb/disklayer.go (about) 1 // Copyright 2022 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package pathdb 18 19 import ( 20 "bytes" 21 "fmt" 22 "sync" 23 "time" 24 25 "github.com/VictoriaMetrics/fastcache" 26 "github.com/ethereum/go-ethereum/common" 27 "github.com/ethereum/go-ethereum/core/rawdb" 28 "github.com/ethereum/go-ethereum/crypto" 29 "github.com/ethereum/go-ethereum/log" 30 ) 31 32 // diskLayer is a low level persistent layer built on top of a key-value store. 33 type diskLayer struct { 34 root common.Hash // Immutable, root hash to which this layer was made for 35 id uint64 // Immutable, corresponding state id 36 db *Database // Path-based trie database 37 38 // These two caches must be maintained separately, because the key 39 // for the root node of the storage trie (accountHash) is identical 40 // to the key for the account data. 41 nodes *fastcache.Cache // GC friendly memory cache of clean nodes 42 states *fastcache.Cache // GC friendly memory cache of clean states 43 44 buffer *buffer // Live buffer to aggregate writes 45 frozen *buffer // Frozen node buffer waiting for flushing 46 47 stale bool // Signals that the layer became stale (state progressed) 48 lock sync.RWMutex // Lock used to protect stale flag and genMarker 49 50 // The generator is set if the state snapshot was not fully completed, 51 // regardless of whether the background generation is running or not. 52 // It should only be unset if the generation completes. 53 generator *generator 54 } 55 56 // newDiskLayer creates a new disk layer based on the passing arguments. 57 func newDiskLayer(root common.Hash, id uint64, db *Database, nodes *fastcache.Cache, states *fastcache.Cache, buffer *buffer, frozen *buffer) *diskLayer { 58 // Initialize the clean caches if the memory allowance is not zero 59 // or reuse the provided caches if they are not nil (inherited from 60 // the original disk layer). 61 if nodes == nil && db.config.TrieCleanSize != 0 { 62 nodes = fastcache.New(db.config.TrieCleanSize) 63 } 64 if states == nil && db.config.StateCleanSize != 0 { 65 states = fastcache.New(db.config.StateCleanSize) 66 } 67 return &diskLayer{ 68 root: root, 69 id: id, 70 db: db, 71 nodes: nodes, 72 states: states, 73 buffer: buffer, 74 frozen: frozen, 75 } 76 } 77 78 // rootHash implements the layer interface, returning root hash of corresponding state. 79 func (dl *diskLayer) rootHash() common.Hash { 80 return dl.root 81 } 82 83 // stateID implements the layer interface, returning the state id of disk layer. 84 func (dl *diskLayer) stateID() uint64 { 85 return dl.id 86 } 87 88 // parentLayer implements the layer interface, returning nil as there's no layer 89 // below the disk. 90 func (dl *diskLayer) parentLayer() layer { 91 return nil 92 } 93 94 // setGenerator links the given generator to disk layer, representing the 95 // associated state snapshot is not fully completed yet and the generation 96 // is potentially running in the background. 97 func (dl *diskLayer) setGenerator(generator *generator) { 98 dl.generator = generator 99 } 100 101 // markStale sets the stale flag as true. 102 func (dl *diskLayer) markStale() { 103 dl.lock.Lock() 104 defer dl.lock.Unlock() 105 106 if dl.stale { 107 panic("triedb disk layer is stale") // we've committed into the same base from two children, boom 108 } 109 dl.stale = true 110 } 111 112 // node implements the layer interface, retrieving the trie node with the 113 // provided node info. No error will be returned if the node is not found. 114 func (dl *diskLayer) node(owner common.Hash, path []byte, depth int) ([]byte, common.Hash, *nodeLoc, error) { 115 dl.lock.RLock() 116 defer dl.lock.RUnlock() 117 118 if dl.stale { 119 return nil, common.Hash{}, nil, errSnapshotStale 120 } 121 // Try to retrieve the trie node from the not-yet-written node buffer first 122 // (both the live one and the frozen one). Note the buffer is lock free since 123 // it's impossible to mutate the buffer before tagging the layer as stale. 124 for _, buffer := range []*buffer{dl.buffer, dl.frozen} { 125 if buffer != nil { 126 n, found := buffer.node(owner, path) 127 if found { 128 dirtyNodeHitMeter.Mark(1) 129 dirtyNodeReadMeter.Mark(int64(len(n.Blob))) 130 dirtyNodeHitDepthHist.Update(int64(depth)) 131 return n.Blob, n.Hash, &nodeLoc{loc: locDirtyCache, depth: depth}, nil 132 } 133 } 134 } 135 dirtyNodeMissMeter.Mark(1) 136 137 // Try to retrieve the trie node from the clean memory cache 138 key := nodeCacheKey(owner, path) 139 if dl.nodes != nil { 140 if blob := dl.nodes.Get(nil, key); len(blob) > 0 { 141 cleanNodeHitMeter.Mark(1) 142 cleanNodeReadMeter.Mark(int64(len(blob))) 143 return blob, crypto.Keccak256Hash(blob), &nodeLoc{loc: locCleanCache, depth: depth}, nil 144 } 145 cleanNodeMissMeter.Mark(1) 146 } 147 // Try to retrieve the trie node from the disk. 148 var blob []byte 149 if owner == (common.Hash{}) { 150 blob = rawdb.ReadAccountTrieNode(dl.db.diskdb, path) 151 } else { 152 blob = rawdb.ReadStorageTrieNode(dl.db.diskdb, owner, path) 153 } 154 // Store the resolved data in the clean cache. The background buffer flusher 155 // may also write to the clean cache concurrently, but two writers cannot 156 // write the same item with different content. If the item already exists, 157 // it will be found in the frozen buffer, eliminating the need to check the 158 // database. 159 if dl.nodes != nil && len(blob) > 0 { 160 dl.nodes.Set(key, blob) 161 cleanNodeWriteMeter.Mark(int64(len(blob))) 162 } 163 return blob, crypto.Keccak256Hash(blob), &nodeLoc{loc: locDiskLayer, depth: depth}, nil 164 } 165 166 // account directly retrieves the account RLP associated with a particular 167 // hash in the slim data format. 168 // 169 // Note the returned account is not a copy, please don't modify it. 170 func (dl *diskLayer) account(hash common.Hash, depth int) ([]byte, error) { 171 dl.lock.RLock() 172 defer dl.lock.RUnlock() 173 174 if dl.stale { 175 return nil, errSnapshotStale 176 } 177 // Try to retrieve the trie node from the not-yet-written node buffer first 178 // (both the live one and the frozen one). Note the buffer is lock free since 179 // it's impossible to mutate the buffer before tagging the layer as stale. 180 for _, buffer := range []*buffer{dl.buffer, dl.frozen} { 181 if buffer != nil { 182 blob, found := buffer.account(hash) 183 if found { 184 dirtyStateHitMeter.Mark(1) 185 dirtyStateReadMeter.Mark(int64(len(blob))) 186 dirtyStateHitDepthHist.Update(int64(depth)) 187 188 if len(blob) == 0 { 189 stateAccountInexMeter.Mark(1) 190 } else { 191 stateAccountExistMeter.Mark(1) 192 } 193 return blob, nil 194 } 195 } 196 } 197 dirtyStateMissMeter.Mark(1) 198 199 // If the layer is being generated, ensure the requested account has 200 // already been covered by the generator. 201 marker := dl.genMarker() 202 if marker != nil && bytes.Compare(hash.Bytes(), marker) > 0 { 203 return nil, errNotCoveredYet 204 } 205 // Try to retrieve the account from the memory cache 206 if dl.states != nil { 207 if blob, found := dl.states.HasGet(nil, hash[:]); found { 208 cleanStateHitMeter.Mark(1) 209 cleanStateReadMeter.Mark(int64(len(blob))) 210 211 if len(blob) == 0 { 212 stateAccountInexMeter.Mark(1) 213 } else { 214 stateAccountExistMeter.Mark(1) 215 } 216 return blob, nil 217 } 218 cleanStateMissMeter.Mark(1) 219 } 220 // Try to retrieve the account from the disk. 221 blob := rawdb.ReadAccountSnapshot(dl.db.diskdb, hash) 222 223 // Store the resolved data in the clean cache. The background buffer flusher 224 // may also write to the clean cache concurrently, but two writers cannot 225 // write the same item with different content. If the item already exists, 226 // it will be found in the frozen buffer, eliminating the need to check the 227 // database. 228 if dl.states != nil { 229 dl.states.Set(hash[:], blob) 230 cleanStateWriteMeter.Mark(int64(len(blob))) 231 } 232 if len(blob) == 0 { 233 stateAccountInexMeter.Mark(1) 234 stateAccountInexDiskMeter.Mark(1) 235 } else { 236 stateAccountExistMeter.Mark(1) 237 stateAccountExistDiskMeter.Mark(1) 238 } 239 return blob, nil 240 } 241 242 // storage directly retrieves the storage data associated with a particular hash, 243 // within a particular account. 244 // 245 // Note the returned account is not a copy, please don't modify it. 246 func (dl *diskLayer) storage(accountHash, storageHash common.Hash, depth int) ([]byte, error) { 247 // Hold the lock, ensure the parent won't be changed during the 248 // state accessing. 249 dl.lock.RLock() 250 defer dl.lock.RUnlock() 251 252 if dl.stale { 253 return nil, errSnapshotStale 254 } 255 // Try to retrieve the trie node from the not-yet-written node buffer first 256 // (both the live one and the frozen one). Note the buffer is lock free since 257 // it's impossible to mutate the buffer before tagging the layer as stale. 258 for _, buffer := range []*buffer{dl.buffer, dl.frozen} { 259 if buffer != nil { 260 if blob, found := buffer.storage(accountHash, storageHash); found { 261 dirtyStateHitMeter.Mark(1) 262 dirtyStateReadMeter.Mark(int64(len(blob))) 263 dirtyStateHitDepthHist.Update(int64(depth)) 264 265 if len(blob) == 0 { 266 stateStorageInexMeter.Mark(1) 267 } else { 268 stateStorageExistMeter.Mark(1) 269 } 270 return blob, nil 271 } 272 } 273 } 274 dirtyStateMissMeter.Mark(1) 275 276 // If the layer is being generated, ensure the requested storage slot 277 // has already been covered by the generator. 278 key := append(accountHash[:], storageHash[:]...) 279 marker := dl.genMarker() 280 if marker != nil && bytes.Compare(key, marker) > 0 { 281 return nil, errNotCoveredYet 282 } 283 // Try to retrieve the storage slot from the memory cache 284 if dl.states != nil { 285 if blob, found := dl.states.HasGet(nil, key); found { 286 cleanStateHitMeter.Mark(1) 287 cleanStateReadMeter.Mark(int64(len(blob))) 288 289 if len(blob) == 0 { 290 stateStorageInexMeter.Mark(1) 291 } else { 292 stateStorageExistMeter.Mark(1) 293 } 294 return blob, nil 295 } 296 cleanStateMissMeter.Mark(1) 297 } 298 // Try to retrieve the account from the disk 299 blob := rawdb.ReadStorageSnapshot(dl.db.diskdb, accountHash, storageHash) 300 301 // Store the resolved data in the clean cache. The background buffer flusher 302 // may also write to the clean cache concurrently, but two writers cannot 303 // write the same item with different content. If the item already exists, 304 // it will be found in the frozen buffer, eliminating the need to check the 305 // database. 306 if dl.states != nil { 307 dl.states.Set(key, blob) 308 cleanStateWriteMeter.Mark(int64(len(blob))) 309 } 310 if len(blob) == 0 { 311 stateStorageInexMeter.Mark(1) 312 stateStorageInexDiskMeter.Mark(1) 313 } else { 314 stateStorageExistMeter.Mark(1) 315 stateStorageExistDiskMeter.Mark(1) 316 } 317 return blob, nil 318 } 319 320 // update implements the layer interface, returning a new diff layer on top 321 // with the given state set. 322 func (dl *diskLayer) update(root common.Hash, id uint64, block uint64, nodes *nodeSet, states *StateSetWithOrigin) *diffLayer { 323 return newDiffLayer(dl, root, id, block, nodes, states) 324 } 325 326 // commit merges the given bottom-most diff layer into the node buffer 327 // and returns a newly constructed disk layer. Note the current disk 328 // layer must be tagged as stale first to prevent re-access. 329 func (dl *diskLayer) commit(bottom *diffLayer, force bool) (*diskLayer, error) { 330 dl.lock.Lock() 331 defer dl.lock.Unlock() 332 333 // Construct and store the state history first. If crash happens after storing 334 // the state history but without flushing the corresponding states(journal), 335 // the stored state history will be truncated from head in the next restart. 336 var ( 337 overflow bool 338 oldest uint64 339 ) 340 if dl.db.freezer != nil { 341 // Bail out with an error if writing the state history fails. 342 // This can happen, for example, if the device is full. 343 err := writeHistory(dl.db.freezer, bottom) 344 if err != nil { 345 return nil, err 346 } 347 // Determine if the persisted history object has exceeded the configured 348 // limitation, set the overflow as true if so. 349 tail, err := dl.db.freezer.Tail() 350 if err != nil { 351 return nil, err 352 } 353 limit := dl.db.config.StateHistory 354 if limit != 0 && bottom.stateID()-tail > limit { 355 overflow = true 356 oldest = bottom.stateID() - limit + 1 // track the id of history **after truncation** 357 } 358 // Notify the state history indexer for newly created history 359 if dl.db.indexer != nil { 360 if err := dl.db.indexer.extend(bottom.stateID()); err != nil { 361 return nil, err 362 } 363 } 364 } 365 // Mark the diskLayer as stale before applying any mutations on top. 366 dl.stale = true 367 368 // Store the root->id lookup afterwards. All stored lookups are identified 369 // by the **unique** state root. It's impossible that in the same chain 370 // blocks are not adjacent but have the same root. 371 if dl.id == 0 { 372 rawdb.WriteStateID(dl.db.diskdb, dl.root, 0) 373 } 374 rawdb.WriteStateID(dl.db.diskdb, bottom.rootHash(), bottom.stateID()) 375 376 // In a unique scenario where the ID of the oldest history object (after tail 377 // truncation) surpasses the persisted state ID, we take the necessary action 378 // of forcibly committing the cached dirty states to ensure that the persisted 379 // state ID remains higher. 380 persistedID := rawdb.ReadPersistentStateID(dl.db.diskdb) 381 if !force && persistedID < oldest { 382 force = true 383 } 384 // Merge the trie nodes and flat states of the bottom-most diff layer into the 385 // buffer as the combined layer. 386 combined := dl.buffer.commit(bottom.nodes, bottom.states.stateSet) 387 388 // Terminate the background state snapshot generation before mutating the 389 // persistent state. 390 if combined.full() || force { 391 // Wait until the previous frozen buffer is fully flushed 392 if dl.frozen != nil { 393 if err := dl.frozen.waitFlush(); err != nil { 394 return nil, err 395 } 396 } 397 // Release the frozen buffer and the internally referenced maps will 398 // be reclaimed by GC. 399 dl.frozen = nil 400 401 // Terminate the background state snapshot generator before flushing 402 // to prevent data race. 403 var ( 404 progress []byte 405 gen = dl.generator 406 ) 407 if gen != nil { 408 gen.stop() 409 progress = gen.progressMarker() 410 411 // If the snapshot has been fully generated, unset the generator 412 if progress == nil { 413 dl.setGenerator(nil) 414 } else { 415 log.Info("Paused snapshot generation") 416 } 417 } 418 419 // Freeze the live buffer and schedule background flushing 420 dl.frozen = combined 421 dl.frozen.flush(bottom.root, dl.db.diskdb, dl.db.freezer, progress, dl.nodes, dl.states, bottom.stateID(), func() { 422 // Resume the background generation if it's not completed yet. 423 // The generator is assumed to be available if the progress is 424 // not nil. 425 // 426 // Notably, the generator will be shared and linked by all the 427 // disk layer instances, regardless of the generation is terminated 428 // or not. 429 if progress != nil { 430 gen.run(bottom.root) 431 } 432 }) 433 // Block until the frozen buffer is fully flushed out if the async flushing 434 // is not allowed, or if the oldest history surpasses the persisted state ID. 435 if dl.db.config.NoAsyncFlush || persistedID < oldest { 436 if err := dl.frozen.waitFlush(); err != nil { 437 return nil, err 438 } 439 dl.frozen = nil 440 } 441 combined = newBuffer(dl.db.config.WriteBufferSize, nil, nil, 0) 442 } 443 // Link the generator if snapshot is not yet completed 444 ndl := newDiskLayer(bottom.root, bottom.stateID(), dl.db, dl.nodes, dl.states, combined, dl.frozen) 445 if dl.generator != nil { 446 ndl.setGenerator(dl.generator) 447 } 448 // To remove outdated history objects from the end, we set the 'tail' parameter 449 // to 'oldest-1' due to the offset between the freezer index and the history ID. 450 if overflow { 451 pruned, err := truncateFromTail(ndl.db.diskdb, ndl.db.freezer, oldest-1) 452 if err != nil { 453 return nil, err 454 } 455 log.Debug("Pruned state history", "items", pruned, "tailid", oldest) 456 } 457 return ndl, nil 458 } 459 460 // revert applies the given state history and return a reverted disk layer. 461 func (dl *diskLayer) revert(h *history) (*diskLayer, error) { 462 start := time.Now() 463 if h.meta.root != dl.rootHash() { 464 return nil, errUnexpectedHistory 465 } 466 if dl.id == 0 { 467 return nil, fmt.Errorf("%w: zero state id", errStateUnrecoverable) 468 } 469 // Apply the reverse state changes upon the current state. This must 470 // be done before holding the lock in order to access state in "this" 471 // layer. 472 nodes, err := apply(dl.db, h.meta.parent, h.meta.root, h.meta.version != stateHistoryV0, h.accounts, h.storages) 473 if err != nil { 474 return nil, err 475 } 476 // Derive the state modification set from the history, keyed by the hash 477 // of the account address and the storage key. 478 accounts, storages := h.stateSet() 479 480 // Mark the diskLayer as stale before applying any mutations on top. 481 dl.lock.Lock() 482 defer dl.lock.Unlock() 483 484 dl.stale = true 485 486 // Unindex the corresponding state history 487 if dl.db.indexer != nil { 488 if err := dl.db.indexer.shorten(dl.id); err != nil { 489 return nil, err 490 } 491 } 492 // State change may be applied to node buffer, or the persistent 493 // state, depends on if node buffer is empty or not. If the node 494 // buffer is not empty, it means that the state transition that 495 // needs to be reverted is not yet flushed and cached in node 496 // buffer, otherwise, manipulate persistent state directly. 497 if !dl.buffer.empty() { 498 err := dl.buffer.revertTo(dl.db.diskdb, nodes, accounts, storages) 499 if err != nil { 500 return nil, err 501 } 502 ndl := newDiskLayer(h.meta.parent, dl.id-1, dl.db, dl.nodes, dl.states, dl.buffer, dl.frozen) 503 504 // Link the generator if it exists 505 if dl.generator != nil { 506 ndl.setGenerator(dl.generator) 507 } 508 log.Debug("Reverted data in write buffer", "oldroot", h.meta.root, "newroot", h.meta.parent, "elapsed", common.PrettyDuration(time.Since(start))) 509 return ndl, nil 510 } 511 // Block until the frozen buffer is fully flushed 512 if dl.frozen != nil { 513 if err := dl.frozen.waitFlush(); err != nil { 514 return nil, err 515 } 516 // Unset the frozen buffer if it exists, otherwise these "reverted" 517 // states will still be accessible after revert in frozen buffer. 518 dl.frozen = nil 519 } 520 521 // Terminate the generator before writing any data to the database. 522 // This must be done after flushing the frozen buffer, as the generator 523 // may be restarted at the end of the flush process. 524 var progress []byte 525 if dl.generator != nil { 526 dl.generator.stop() 527 progress = dl.generator.progressMarker() 528 } 529 batch := dl.db.diskdb.NewBatch() 530 writeNodes(batch, nodes, dl.nodes) 531 532 // Provide the original values of modified accounts and storages for revert 533 writeStates(batch, progress, accounts, storages, dl.states) 534 rawdb.WritePersistentStateID(batch, dl.id-1) 535 rawdb.WriteSnapshotRoot(batch, h.meta.parent) 536 if err := batch.Write(); err != nil { 537 log.Crit("Failed to write states", "err", err) 538 } 539 // Link the generator and resume generation if the snapshot is not yet 540 // fully completed. 541 ndl := newDiskLayer(h.meta.parent, dl.id-1, dl.db, dl.nodes, dl.states, dl.buffer, dl.frozen) 542 if dl.generator != nil && !dl.generator.completed() { 543 ndl.generator = dl.generator 544 ndl.generator.run(h.meta.parent) 545 } 546 log.Debug("Reverted data in persistent state", "oldroot", h.meta.root, "newroot", h.meta.parent, "elapsed", common.PrettyDuration(time.Since(start))) 547 return ndl, nil 548 } 549 550 // size returns the approximate size of cached nodes in the disk layer. 551 func (dl *diskLayer) size() common.StorageSize { 552 dl.lock.RLock() 553 defer dl.lock.RUnlock() 554 555 if dl.stale { 556 return 0 557 } 558 return common.StorageSize(dl.buffer.size()) 559 } 560 561 // resetCache releases the memory held by clean cache to prevent memory leak. 562 func (dl *diskLayer) resetCache() { 563 dl.lock.RLock() 564 defer dl.lock.RUnlock() 565 566 // Stale disk layer loses the ownership of clean caches. 567 if dl.stale { 568 return 569 } 570 if dl.nodes != nil { 571 dl.nodes.Reset() 572 } 573 if dl.states != nil { 574 dl.states.Reset() 575 } 576 } 577 578 // genMarker returns the current state snapshot generation progress marker. If 579 // the state snapshot has already been fully generated, nil is returned. 580 func (dl *diskLayer) genMarker() []byte { 581 if dl.generator == nil { 582 return nil 583 } 584 return dl.generator.progressMarker() 585 } 586 587 // genComplete returns a flag indicating whether the state snapshot has been 588 // fully generated. 589 func (dl *diskLayer) genComplete() bool { 590 dl.lock.RLock() 591 defer dl.lock.RUnlock() 592 593 return dl.genMarker() == nil 594 } 595 596 // waitFlush blocks until the background buffer flush is completed. 597 func (dl *diskLayer) waitFlush() error { 598 dl.lock.RLock() 599 defer dl.lock.RUnlock() 600 601 if dl.frozen == nil { 602 return nil 603 } 604 return dl.frozen.waitFlush() 605 } 606 607 // terminate releases the frozen buffer if it's not nil and terminates the 608 // background state generator. 609 func (dl *diskLayer) terminate() error { 610 dl.lock.Lock() 611 defer dl.lock.Unlock() 612 613 if dl.frozen != nil { 614 if err := dl.frozen.waitFlush(); err != nil { 615 return err 616 } 617 dl.frozen = nil 618 } 619 if dl.generator != nil { 620 dl.generator.stop() 621 } 622 return nil 623 }