github.1485827954.workers.dev/ethereum/go-ethereum@v1.14.3/triedb/pathdb/database.go (about) 1 // Copyright 2022 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package pathdb 18 19 import ( 20 "errors" 21 "fmt" 22 "io" 23 "sync" 24 "time" 25 26 "github.com/ethereum/go-ethereum/common" 27 "github.com/ethereum/go-ethereum/core/rawdb" 28 "github.com/ethereum/go-ethereum/core/types" 29 "github.com/ethereum/go-ethereum/crypto" 30 "github.com/ethereum/go-ethereum/ethdb" 31 "github.com/ethereum/go-ethereum/log" 32 "github.com/ethereum/go-ethereum/params" 33 "github.com/ethereum/go-ethereum/trie/trienode" 34 "github.com/ethereum/go-ethereum/trie/triestate" 35 ) 36 37 const ( 38 // defaultCleanSize is the default memory allowance of clean cache. 39 defaultCleanSize = 16 * 1024 * 1024 40 41 // maxBufferSize is the maximum memory allowance of node buffer. 42 // Too large nodebuffer will cause the system to pause for a long 43 // time when write happens. Also, the largest batch that pebble can 44 // support is 4GB, node will panic if batch size exceeds this limit. 45 maxBufferSize = 256 * 1024 * 1024 46 47 // DefaultBufferSize is the default memory allowance of node buffer 48 // that aggregates the writes from above until it's flushed into the 49 // disk. It's meant to be used once the initial sync is finished. 50 // Do not increase the buffer size arbitrarily, otherwise the system 51 // pause time will increase when the database writes happen. 52 DefaultBufferSize = 64 * 1024 * 1024 53 ) 54 55 var ( 56 // maxDiffLayers is the maximum diff layers allowed in the layer tree. 57 maxDiffLayers = 128 58 ) 59 60 // layer is the interface implemented by all state layers which includes some 61 // public methods and some additional methods for internal usage. 62 type layer interface { 63 // node retrieves the trie node with the node info. An error will be returned 64 // if the read operation exits abnormally. Specifically, if the layer is 65 // already stale. 66 // 67 // Note, no error will be returned if the requested node is not found in database. 68 node(owner common.Hash, path []byte, depth int) ([]byte, common.Hash, *nodeLoc, error) 69 70 // rootHash returns the root hash for which this layer was made. 71 rootHash() common.Hash 72 73 // stateID returns the associated state id of layer. 74 stateID() uint64 75 76 // parentLayer returns the subsequent layer of it, or nil if the disk was reached. 77 parentLayer() layer 78 79 // update creates a new layer on top of the existing layer diff tree with 80 // the provided dirty trie nodes along with the state change set. 81 // 82 // Note, the maps are retained by the method to avoid copying everything. 83 update(root common.Hash, id uint64, block uint64, nodes map[common.Hash]map[string]*trienode.Node, states *triestate.Set) *diffLayer 84 85 // journal commits an entire diff hierarchy to disk into a single journal entry. 86 // This is meant to be used during shutdown to persist the layer without 87 // flattening everything down (bad for reorgs). 88 journal(w io.Writer) error 89 } 90 91 // Config contains the settings for database. 92 type Config struct { 93 StateHistory uint64 // Number of recent blocks to maintain state history for 94 CleanCacheSize int // Maximum memory allowance (in bytes) for caching clean nodes 95 DirtyCacheSize int // Maximum memory allowance (in bytes) for caching dirty nodes 96 ReadOnly bool // Flag whether the database is opened in read only mode. 97 } 98 99 // sanitize checks the provided user configurations and changes anything that's 100 // unreasonable or unworkable. 101 func (c *Config) sanitize() *Config { 102 conf := *c 103 if conf.DirtyCacheSize > maxBufferSize { 104 log.Warn("Sanitizing invalid node buffer size", "provided", common.StorageSize(conf.DirtyCacheSize), "updated", common.StorageSize(maxBufferSize)) 105 conf.DirtyCacheSize = maxBufferSize 106 } 107 return &conf 108 } 109 110 // Defaults contains default settings for Ethereum mainnet. 111 var Defaults = &Config{ 112 StateHistory: params.FullImmutabilityThreshold, 113 CleanCacheSize: defaultCleanSize, 114 DirtyCacheSize: DefaultBufferSize, 115 } 116 117 // ReadOnly is the config in order to open database in read only mode. 118 var ReadOnly = &Config{ReadOnly: true} 119 120 // Database is a multiple-layered structure for maintaining in-memory trie nodes. 121 // It consists of one persistent base layer backed by a key-value store, on top 122 // of which arbitrarily many in-memory diff layers are stacked. The memory diffs 123 // can form a tree with branching, but the disk layer is singleton and common to 124 // all. If a reorg goes deeper than the disk layer, a batch of reverse diffs can 125 // be applied to rollback. The deepest reorg that can be handled depends on the 126 // amount of state histories tracked in the disk. 127 // 128 // At most one readable and writable database can be opened at the same time in 129 // the whole system which ensures that only one database writer can operate disk 130 // state. Unexpected open operations can cause the system to panic. 131 type Database struct { 132 // readOnly is the flag whether the mutation is allowed to be applied. 133 // It will be set automatically when the database is journaled during 134 // the shutdown to reject all following unexpected mutations. 135 readOnly bool // Flag if database is opened in read only mode 136 waitSync bool // Flag if database is deactivated due to initial state sync 137 isVerkle bool // Flag if database is used for verkle tree 138 bufferSize int // Memory allowance (in bytes) for caching dirty nodes 139 config *Config // Configuration for database 140 diskdb ethdb.Database // Persistent storage for matured trie nodes 141 tree *layerTree // The group for all known layers 142 freezer ethdb.ResettableAncientStore // Freezer for storing trie histories, nil possible in tests 143 lock sync.RWMutex // Lock to prevent mutations from happening at the same time 144 } 145 146 // New attempts to load an already existing layer from a persistent key-value 147 // store (with a number of memory layers from a journal). If the journal is not 148 // matched with the base persistent layer, all the recorded diff layers are discarded. 149 func New(diskdb ethdb.Database, config *Config, isVerkle bool) *Database { 150 if config == nil { 151 config = Defaults 152 } 153 config = config.sanitize() 154 155 db := &Database{ 156 readOnly: config.ReadOnly, 157 isVerkle: isVerkle, 158 bufferSize: config.DirtyCacheSize, 159 config: config, 160 diskdb: diskdb, 161 } 162 // Construct the layer tree by resolving the in-disk singleton state 163 // and in-memory layer journal. 164 db.tree = newLayerTree(db.loadLayers()) 165 166 // Repair the state history, which might not be aligned with the state 167 // in the key-value store due to an unclean shutdown. 168 if err := db.repairHistory(); err != nil { 169 log.Crit("Failed to repair pathdb", "err", err) 170 } 171 // Disable database in case node is still in the initial state sync stage. 172 if rawdb.ReadSnapSyncStatusFlag(diskdb) == rawdb.StateSyncRunning && !db.readOnly { 173 if err := db.Disable(); err != nil { 174 log.Crit("Failed to disable database", "err", err) // impossible to happen 175 } 176 } 177 return db 178 } 179 180 // repairHistory truncates leftover state history objects, which may occur due 181 // to an unclean shutdown or other unexpected reasons. 182 func (db *Database) repairHistory() error { 183 // Open the freezer for state history. This mechanism ensures that 184 // only one database instance can be opened at a time to prevent 185 // accidental mutation. 186 ancient, err := db.diskdb.AncientDatadir() 187 if err != nil { 188 // TODO error out if ancient store is disabled. A tons of unit tests 189 // disable the ancient store thus the error here will immediately fail 190 // all of them. Fix the tests first. 191 return nil 192 } 193 freezer, err := rawdb.NewStateFreezer(ancient, false) 194 if err != nil { 195 log.Crit("Failed to open state history freezer", "err", err) 196 } 197 db.freezer = freezer 198 199 // Reset the entire state histories if the trie database is not initialized 200 // yet. This action is necessary because these state histories are not 201 // expected to exist without an initialized trie database. 202 id := db.tree.bottom().stateID() 203 if id == 0 { 204 frozen, err := db.freezer.Ancients() 205 if err != nil { 206 log.Crit("Failed to retrieve head of state history", "err", err) 207 } 208 if frozen != 0 { 209 err := db.freezer.Reset() 210 if err != nil { 211 log.Crit("Failed to reset state histories", "err", err) 212 } 213 log.Info("Truncated extraneous state history") 214 } 215 return nil 216 } 217 // Truncate the extra state histories above in freezer in case it's not 218 // aligned with the disk layer. It might happen after a unclean shutdown. 219 pruned, err := truncateFromHead(db.diskdb, db.freezer, id) 220 if err != nil { 221 log.Crit("Failed to truncate extra state histories", "err", err) 222 } 223 if pruned != 0 { 224 log.Warn("Truncated extra state histories", "number", pruned) 225 } 226 return nil 227 } 228 229 // Update adds a new layer into the tree, if that can be linked to an existing 230 // old parent. It is disallowed to insert a disk layer (the origin of all). Apart 231 // from that this function will flatten the extra diff layers at bottom into disk 232 // to only keep 128 diff layers in memory by default. 233 // 234 // The passed in maps(nodes, states) will be retained to avoid copying everything. 235 // Therefore, these maps must not be changed afterwards. 236 func (db *Database) Update(root common.Hash, parentRoot common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error { 237 // Hold the lock to prevent concurrent mutations. 238 db.lock.Lock() 239 defer db.lock.Unlock() 240 241 // Short circuit if the mutation is not allowed. 242 if err := db.modifyAllowed(); err != nil { 243 return err 244 } 245 if err := db.tree.add(root, parentRoot, block, nodes, states); err != nil { 246 return err 247 } 248 // Keep 128 diff layers in the memory, persistent layer is 129th. 249 // - head layer is paired with HEAD state 250 // - head-1 layer is paired with HEAD-1 state 251 // - head-127 layer(bottom-most diff layer) is paired with HEAD-127 state 252 // - head-128 layer(disk layer) is paired with HEAD-128 state 253 return db.tree.cap(root, maxDiffLayers) 254 } 255 256 // Commit traverses downwards the layer tree from a specified layer with the 257 // provided state root and all the layers below are flattened downwards. It 258 // can be used alone and mostly for test purposes. 259 func (db *Database) Commit(root common.Hash, report bool) error { 260 // Hold the lock to prevent concurrent mutations. 261 db.lock.Lock() 262 defer db.lock.Unlock() 263 264 // Short circuit if the mutation is not allowed. 265 if err := db.modifyAllowed(); err != nil { 266 return err 267 } 268 return db.tree.cap(root, 0) 269 } 270 271 // Disable deactivates the database and invalidates all available state layers 272 // as stale to prevent access to the persistent state, which is in the syncing 273 // stage. 274 func (db *Database) Disable() error { 275 db.lock.Lock() 276 defer db.lock.Unlock() 277 278 // Short circuit if the database is in read only mode. 279 if db.readOnly { 280 return errDatabaseReadOnly 281 } 282 // Prevent duplicated disable operation. 283 if db.waitSync { 284 log.Error("Reject duplicated disable operation") 285 return nil 286 } 287 db.waitSync = true 288 289 // Mark the disk layer as stale to prevent access to persistent state. 290 db.tree.bottom().markStale() 291 292 // Write the initial sync flag to persist it across restarts. 293 rawdb.WriteSnapSyncStatusFlag(db.diskdb, rawdb.StateSyncRunning) 294 log.Info("Disabled trie database due to state sync") 295 return nil 296 } 297 298 // Enable activates database and resets the state tree with the provided persistent 299 // state root once the state sync is finished. 300 func (db *Database) Enable(root common.Hash) error { 301 db.lock.Lock() 302 defer db.lock.Unlock() 303 304 // Short circuit if the database is in read only mode. 305 if db.readOnly { 306 return errDatabaseReadOnly 307 } 308 // Ensure the provided state root matches the stored one. 309 root = types.TrieRootHash(root) 310 stored := types.EmptyRootHash 311 if blob := rawdb.ReadAccountTrieNode(db.diskdb, nil); len(blob) > 0 { 312 stored = crypto.Keccak256Hash(blob) 313 } 314 if stored != root { 315 return fmt.Errorf("state root mismatch: stored %x, synced %x", stored, root) 316 } 317 // Drop the stale state journal in persistent database and 318 // reset the persistent state id back to zero. 319 batch := db.diskdb.NewBatch() 320 rawdb.DeleteTrieJournal(batch) 321 rawdb.WritePersistentStateID(batch, 0) 322 if err := batch.Write(); err != nil { 323 return err 324 } 325 // Clean up all state histories in freezer. Theoretically 326 // all root->id mappings should be removed as well. Since 327 // mappings can be huge and might take a while to clear 328 // them, just leave them in disk and wait for overwriting. 329 if db.freezer != nil { 330 if err := db.freezer.Reset(); err != nil { 331 return err 332 } 333 } 334 // Re-construct a new disk layer backed by persistent state 335 // with **empty clean cache and node buffer**. 336 db.tree.reset(newDiskLayer(root, 0, db, nil, newNodeBuffer(db.bufferSize, nil, 0))) 337 338 // Re-enable the database as the final step. 339 db.waitSync = false 340 rawdb.WriteSnapSyncStatusFlag(db.diskdb, rawdb.StateSyncFinished) 341 log.Info("Rebuilt trie database", "root", root) 342 return nil 343 } 344 345 // Recover rollbacks the database to a specified historical point. 346 // The state is supported as the rollback destination only if it's 347 // canonical state and the corresponding trie histories are existent. 348 func (db *Database) Recover(root common.Hash, loader triestate.TrieLoader) error { 349 db.lock.Lock() 350 defer db.lock.Unlock() 351 352 // Short circuit if rollback operation is not supported. 353 if err := db.modifyAllowed(); err != nil { 354 return err 355 } 356 if db.freezer == nil { 357 return errors.New("state rollback is non-supported") 358 } 359 // Short circuit if the target state is not recoverable. 360 root = types.TrieRootHash(root) 361 if !db.Recoverable(root) { 362 return errStateUnrecoverable 363 } 364 // Apply the state histories upon the disk layer in order. 365 var ( 366 start = time.Now() 367 dl = db.tree.bottom() 368 ) 369 for dl.rootHash() != root { 370 h, err := readHistory(db.freezer, dl.stateID()) 371 if err != nil { 372 return err 373 } 374 dl, err = dl.revert(h, loader) 375 if err != nil { 376 return err 377 } 378 // reset layer with newly created disk layer. It must be 379 // done after each revert operation, otherwise the new 380 // disk layer won't be accessible from outside. 381 db.tree.reset(dl) 382 } 383 rawdb.DeleteTrieJournal(db.diskdb) 384 _, err := truncateFromHead(db.diskdb, db.freezer, dl.stateID()) 385 if err != nil { 386 return err 387 } 388 log.Debug("Recovered state", "root", root, "elapsed", common.PrettyDuration(time.Since(start))) 389 return nil 390 } 391 392 // Recoverable returns the indicator if the specified state is recoverable. 393 func (db *Database) Recoverable(root common.Hash) bool { 394 // Ensure the requested state is a known state. 395 root = types.TrieRootHash(root) 396 id := rawdb.ReadStateID(db.diskdb, root) 397 if id == nil { 398 return false 399 } 400 // Recoverable state must below the disk layer. The recoverable 401 // state only refers the state that is currently not available, 402 // but can be restored by applying state history. 403 dl := db.tree.bottom() 404 if *id >= dl.stateID() { 405 return false 406 } 407 // This is a temporary workaround for the unavailability of the freezer in 408 // dev mode. As a consequence, the Pathdb loses the ability for deep reorg 409 // in certain cases. 410 // TODO(rjl493456442): Implement the in-memory ancient store. 411 if db.freezer == nil { 412 return false 413 } 414 // Ensure the requested state is a canonical state and all state 415 // histories in range [id+1, disklayer.ID] are present and complete. 416 return checkHistories(db.freezer, *id+1, dl.stateID()-*id, func(m *meta) error { 417 if m.parent != root { 418 return errors.New("unexpected state history") 419 } 420 root = m.root 421 return nil 422 }) == nil 423 } 424 425 // Close closes the trie database and the held freezer. 426 func (db *Database) Close() error { 427 db.lock.Lock() 428 defer db.lock.Unlock() 429 430 // Set the database to read-only mode to prevent all 431 // following mutations. 432 db.readOnly = true 433 434 // Release the memory held by clean cache. 435 db.tree.bottom().resetCache() 436 437 // Close the attached state history freezer. 438 if db.freezer == nil { 439 return nil 440 } 441 return db.freezer.Close() 442 } 443 444 // Size returns the current storage size of the memory cache in front of the 445 // persistent database layer. 446 func (db *Database) Size() (diffs common.StorageSize, nodes common.StorageSize) { 447 db.tree.forEach(func(layer layer) { 448 if diff, ok := layer.(*diffLayer); ok { 449 diffs += common.StorageSize(diff.memory) 450 } 451 if disk, ok := layer.(*diskLayer); ok { 452 nodes += disk.size() 453 } 454 }) 455 return diffs, nodes 456 } 457 458 // Initialized returns an indicator if the state data is already 459 // initialized in path-based scheme. 460 func (db *Database) Initialized(genesisRoot common.Hash) bool { 461 var inited bool 462 db.tree.forEach(func(layer layer) { 463 if layer.rootHash() != types.EmptyRootHash { 464 inited = true 465 } 466 }) 467 if !inited { 468 inited = rawdb.ReadSnapSyncStatusFlag(db.diskdb) != rawdb.StateSyncUnknown 469 } 470 return inited 471 } 472 473 // SetBufferSize sets the node buffer size to the provided value(in bytes). 474 func (db *Database) SetBufferSize(size int) error { 475 db.lock.Lock() 476 defer db.lock.Unlock() 477 478 if size > maxBufferSize { 479 log.Info("Capped node buffer size", "provided", common.StorageSize(size), "adjusted", common.StorageSize(maxBufferSize)) 480 size = maxBufferSize 481 } 482 db.bufferSize = size 483 return db.tree.bottom().setBufferSize(db.bufferSize) 484 } 485 486 // modifyAllowed returns the indicator if mutation is allowed. This function 487 // assumes the db.lock is already held. 488 func (db *Database) modifyAllowed() error { 489 if db.readOnly { 490 return errDatabaseReadOnly 491 } 492 if db.waitSync { 493 return errDatabaseWaitSync 494 } 495 return nil 496 } 497 498 // AccountHistory inspects the account history within the specified range. 499 // 500 // Start: State ID of the first history object for the query. 0 implies the first 501 // available object is selected as the starting point. 502 // 503 // End: State ID of the last history for the query. 0 implies the last available 504 // object is selected as the ending point. Note end is included in the query. 505 func (db *Database) AccountHistory(address common.Address, start, end uint64) (*HistoryStats, error) { 506 return accountHistory(db.freezer, address, start, end) 507 } 508 509 // StorageHistory inspects the storage history within the specified range. 510 // 511 // Start: State ID of the first history object for the query. 0 implies the first 512 // available object is selected as the starting point. 513 // 514 // End: State ID of the last history for the query. 0 implies the last available 515 // object is selected as the ending point. Note end is included in the query. 516 // 517 // Note, slot refers to the hash of the raw slot key. 518 func (db *Database) StorageHistory(address common.Address, slot common.Hash, start uint64, end uint64) (*HistoryStats, error) { 519 return storageHistory(db.freezer, address, slot, start, end) 520 } 521 522 // HistoryRange returns the block numbers associated with earliest and latest 523 // state history in the local store. 524 func (db *Database) HistoryRange() (uint64, uint64, error) { 525 return historyRange(db.freezer) 526 }