github.com/MetalBlockchain/subnet-evm@v0.4.9/trie/database.go (about) 1 // (c) 2020-2022, Ava Labs, Inc. 2 // 3 // This file is a derived work, based on the go-ethereum library whose original 4 // notices appear below. 5 // 6 // It is distributed under a license compatible with the licensing terms of the 7 // original code from which it is derived. 8 // 9 // Much love to the original authors for their work. 10 // ********** 11 // Copyright 2018 The go-ethereum Authors 12 // This file is part of the go-ethereum library. 13 // 14 // The go-ethereum library is free software: you can redistribute it and/or modify 15 // it under the terms of the GNU Lesser General Public License as published by 16 // the Free Software Foundation, either version 3 of the License, or 17 // (at your option) any later version. 18 // 19 // The go-ethereum library is distributed in the hope that it will be useful, 20 // but WITHOUT ANY WARRANTY; without even the implied warranty of 21 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 // GNU Lesser General Public License for more details. 23 // 24 // You should have received a copy of the GNU Lesser General Public License 25 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 26 27 package trie 28 29 import ( 30 "errors" 31 "fmt" 32 "io" 33 "reflect" 34 "sync" 35 "time" 36 37 "github.com/MetalBlockchain/subnet-evm/core/rawdb" 38 "github.com/MetalBlockchain/subnet-evm/core/types" 39 "github.com/MetalBlockchain/subnet-evm/ethdb" 40 "github.com/MetalBlockchain/subnet-evm/metrics" 41 "github.com/MetalBlockchain/subnet-evm/utils" 42 "github.com/ethereum/go-ethereum/common" 43 "github.com/ethereum/go-ethereum/log" 44 "github.com/ethereum/go-ethereum/rlp" 45 ) 46 47 const ( 48 cacheStatsUpdateFrequency = 1000 // update trie cache stats once per 1000 ops 49 ) 50 51 var ( 52 memcacheCleanHitMeter = metrics.NewRegisteredMeter("trie/memcache/clean/hit", nil) 53 memcacheCleanMissMeter = metrics.NewRegisteredMeter("trie/memcache/clean/miss", nil) 54 memcacheCleanReadMeter = metrics.NewRegisteredMeter("trie/memcache/clean/read", nil) 55 memcacheCleanWriteMeter = metrics.NewRegisteredMeter("trie/memcache/clean/write", nil) 56 57 memcacheDirtyHitMeter = metrics.NewRegisteredMeter("trie/memcache/dirty/hit", nil) 58 memcacheDirtyMissMeter = metrics.NewRegisteredMeter("trie/memcache/dirty/miss", nil) 59 memcacheDirtyReadMeter = metrics.NewRegisteredMeter("trie/memcache/dirty/read", nil) 60 memcacheDirtyWriteMeter = metrics.NewRegisteredMeter("trie/memcache/dirty/write", nil) 61 memcacheDirtySizeGauge = metrics.NewRegisteredGaugeFloat64("trie/memcache/dirty/size", nil) 62 memcacheDirtyChildSizeGauge = metrics.NewRegisteredGaugeFloat64("trie/memcache/dirty/childsize", nil) 63 memcacheDirtyNodesGauge = metrics.NewRegisteredGauge("trie/memcache/dirty/nodes", nil) 64 65 memcacheFlushMeter = metrics.NewRegisteredMeter("trie/memcache/flush/count", nil) 66 memcacheFlushTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/flush/time", nil) 67 memcacheFlushLockTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/flush/locktime", nil) 68 memcacheFlushNodesMeter = metrics.NewRegisteredMeter("trie/memcache/flush/nodes", nil) 69 memcacheFlushSizeMeter = metrics.NewRegisteredMeter("trie/memcache/flush/size", nil) 70 71 memcacheGCTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/gc/time", nil) 72 memcacheGCNodesMeter = metrics.NewRegisteredMeter("trie/memcache/gc/nodes", nil) 73 memcacheGCSizeMeter = metrics.NewRegisteredMeter("trie/memcache/gc/size", nil) 74 75 memcacheCommitMeter = metrics.NewRegisteredMeter("trie/memcache/commit/count", nil) 76 memcacheCommitTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/commit/time", nil) 77 memcacheCommitLockTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/commit/locktime", nil) 78 memcacheCommitNodesMeter = metrics.NewRegisteredMeter("trie/memcache/commit/nodes", nil) 79 memcacheCommitSizeMeter = metrics.NewRegisteredMeter("trie/memcache/commit/size", nil) 80 ) 81 82 // Database is an intermediate write layer between the trie data structures and 83 // the disk database. The aim is to accumulate trie writes in-memory and only 84 // periodically flush a couple tries to disk, garbage collecting the remainder. 85 // 86 // The trie Database is thread-safe in its mutations and is thread-safe in providing individual, 87 // independent node access. 88 type Database struct { 89 diskdb ethdb.KeyValueStore // Persistent storage for matured trie nodes 90 91 cleans *utils.MeteredCache // GC friendly memory cache of clean node RLPs 92 dirties map[common.Hash]*cachedNode // Data and references relationships of dirty trie nodes 93 oldest common.Hash // Oldest tracked node, flush-list head 94 newest common.Hash // Newest tracked node, flush-list tail 95 96 gctime time.Duration // Time spent on garbage collection since last commit 97 gcnodes uint64 // Nodes garbage collected since last commit 98 gcsize common.StorageSize // Data storage garbage collected since last commit 99 100 flushtime time.Duration // Time spent on data flushing since last commit 101 flushnodes uint64 // Nodes flushed since last commit 102 flushsize common.StorageSize // Data storage flushed since last commit 103 104 dirtiesSize common.StorageSize // Storage size of the dirty node cache (exc. metadata) 105 childrenSize common.StorageSize // Storage size of the external children tracking 106 preimages *preimageStore // The store for caching preimages 107 108 lock sync.RWMutex 109 } 110 111 // rawNode is a simple binary blob used to differentiate between collapsed trie 112 // nodes and already encoded RLP binary blobs (while at the same time store them 113 // in the same cache fields). 114 type rawNode []byte 115 116 func (n rawNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } 117 func (n rawNode) fstring(ind string) string { panic("this should never end up in a live trie") } 118 119 func (n rawNode) EncodeRLP(w io.Writer) error { 120 _, err := w.Write(n) 121 return err 122 } 123 124 // rawFullNode represents only the useful data content of a full node, with the 125 // caches and flags stripped out to minimize its data storage. This type honors 126 // the same RLP encoding as the original parent. 127 type rawFullNode [17]node 128 129 func (n rawFullNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } 130 func (n rawFullNode) fstring(ind string) string { panic("this should never end up in a live trie") } 131 132 func (n rawFullNode) EncodeRLP(w io.Writer) error { 133 eb := rlp.NewEncoderBuffer(w) 134 n.encode(eb) 135 return eb.Flush() 136 } 137 138 // rawShortNode represents only the useful data content of a short node, with the 139 // caches and flags stripped out to minimize its data storage. This type honors 140 // the same RLP encoding as the original parent. 141 type rawShortNode struct { 142 Key []byte 143 Val node 144 } 145 146 func (n rawShortNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } 147 func (n rawShortNode) fstring(ind string) string { panic("this should never end up in a live trie") } 148 149 // cachedNode is all the information we know about a single cached trie node 150 // in the memory database write layer. 151 type cachedNode struct { 152 node node // Cached collapsed trie node, or raw rlp data 153 size uint16 // Byte size of the useful cached data 154 155 parents uint32 // Number of live nodes referencing this one 156 children map[common.Hash]uint16 // External children referenced by this node 157 158 flushPrev common.Hash // Previous node in the flush-list 159 flushNext common.Hash // Next node in the flush-list 160 } 161 162 // cachedNodeSize is the raw size of a cachedNode data structure without any 163 // node data included. It's an approximate size, but should be a lot better 164 // than not counting them. 165 var cachedNodeSize = int(reflect.TypeOf(cachedNode{}).Size()) 166 167 // cachedNodeChildrenSize is the raw size of an initialized but empty external 168 // reference map. 169 const cachedNodeChildrenSize = 48 170 171 // rlp returns the raw rlp encoded blob of the cached trie node, either directly 172 // from the cache, or by regenerating it from the collapsed node. 173 func (n *cachedNode) rlp() []byte { 174 if node, ok := n.node.(rawNode); ok { 175 return node 176 } 177 return nodeToBytes(n.node) 178 } 179 180 // obj returns the decoded and expanded trie node, either directly from the cache, 181 // or by regenerating it from the rlp encoded blob. 182 func (n *cachedNode) obj(hash common.Hash) node { 183 if node, ok := n.node.(rawNode); ok { 184 // The raw-blob format nodes are loaded either from the 185 // clean cache or the database, they are all in their own 186 // copy and safe to use unsafe decoder. 187 return mustDecodeNodeUnsafe(hash[:], node) 188 } 189 return expandNode(hash[:], n.node) 190 } 191 192 // forChilds invokes the callback for all the tracked children of this node, 193 // both the implicit ones from inside the node as well as the explicit ones 194 // from outside the node. 195 func (n *cachedNode) forChilds(onChild func(hash common.Hash)) { 196 for child := range n.children { 197 onChild(child) 198 } 199 if _, ok := n.node.(rawNode); !ok { 200 forGatherChildren(n.node, onChild) 201 } 202 } 203 204 // forGatherChildren traverses the node hierarchy of a collapsed storage node and 205 // invokes the callback for all the hashnode children. 206 func forGatherChildren(n node, onChild func(hash common.Hash)) { 207 switch n := n.(type) { 208 case *rawShortNode: 209 forGatherChildren(n.Val, onChild) 210 case rawFullNode: 211 for i := 0; i < 16; i++ { 212 forGatherChildren(n[i], onChild) 213 } 214 case hashNode: 215 onChild(common.BytesToHash(n)) 216 case valueNode, nil, rawNode: 217 default: 218 panic(fmt.Sprintf("unknown node type: %T", n)) 219 } 220 } 221 222 // simplifyNode traverses the hierarchy of an expanded memory node and discards 223 // all the internal caches, returning a node that only contains the raw data. 224 func simplifyNode(n node) node { 225 switch n := n.(type) { 226 case *shortNode: 227 // Short nodes discard the flags and cascade 228 return &rawShortNode{Key: n.Key, Val: simplifyNode(n.Val)} 229 230 case *fullNode: 231 // Full nodes discard the flags and cascade 232 node := rawFullNode(n.Children) 233 for i := 0; i < len(node); i++ { 234 if node[i] != nil { 235 node[i] = simplifyNode(node[i]) 236 } 237 } 238 return node 239 240 case valueNode, hashNode, rawNode: 241 return n 242 243 default: 244 panic(fmt.Sprintf("unknown node type: %T", n)) 245 } 246 } 247 248 // expandNode traverses the node hierarchy of a collapsed storage node and converts 249 // all fields and keys into expanded memory form. 250 func expandNode(hash hashNode, n node) node { 251 switch n := n.(type) { 252 case *rawShortNode: 253 // Short nodes need key and child expansion 254 return &shortNode{ 255 Key: compactToHex(n.Key), 256 Val: expandNode(nil, n.Val), 257 flags: nodeFlag{ 258 hash: hash, 259 }, 260 } 261 262 case rawFullNode: 263 // Full nodes need child expansion 264 node := &fullNode{ 265 flags: nodeFlag{ 266 hash: hash, 267 }, 268 } 269 for i := 0; i < len(node.Children); i++ { 270 if n[i] != nil { 271 node.Children[i] = expandNode(nil, n[i]) 272 } 273 } 274 return node 275 276 case valueNode, hashNode: 277 return n 278 279 default: 280 panic(fmt.Sprintf("unknown node type: %T", n)) 281 } 282 } 283 284 // Config defines all necessary options for database. 285 type Config struct { 286 Cache int // Memory allowance (MB) to use for caching trie nodes in memory 287 Preimages bool // Flag whether the preimage of trie key is recorded 288 Journal string // File location to load trie clean cache from 289 StatsPrefix string // Prefix for cache stats (disabled if empty) 290 } 291 292 // NewDatabase creates a new trie database to store ephemeral trie content before 293 // its written out to disk or garbage collected. No read cache is created, so all 294 // data retrievals will hit the underlying disk database. 295 func NewDatabase(diskdb ethdb.KeyValueStore) *Database { 296 return NewDatabaseWithConfig(diskdb, nil) 297 } 298 299 // NewDatabaseWithConfig creates a new trie database to store ephemeral trie content 300 // before its written out to disk or garbage collected. It also acts as a read cache 301 // for nodes loaded from disk. 302 func NewDatabaseWithConfig(diskdb ethdb.KeyValueStore, config *Config) *Database { 303 var cleans *utils.MeteredCache 304 if config != nil && config.Cache > 0 { 305 cleans = utils.NewMeteredCache(config.Cache*1024*1024, config.Journal, config.StatsPrefix, cacheStatsUpdateFrequency) 306 } 307 var preimage *preimageStore 308 if config != nil && config.Preimages { 309 preimage = newPreimageStore(diskdb) 310 } 311 db := &Database{ 312 diskdb: diskdb, 313 cleans: cleans, 314 dirties: map[common.Hash]*cachedNode{{}: { 315 children: make(map[common.Hash]uint16), 316 }}, 317 preimages: preimage, 318 } 319 return db 320 } 321 322 // DiskDB retrieves the persistent storage backing the trie database. 323 func (db *Database) DiskDB() ethdb.KeyValueStore { 324 return db.diskdb 325 } 326 327 // insert inserts a simplified trie node into the memory database. 328 // All nodes inserted by this function will be reference tracked 329 // and in theory should only used for **trie nodes** insertion. 330 func (db *Database) insert(hash common.Hash, size int, node node) { 331 // If the node's already cached, skip 332 if _, ok := db.dirties[hash]; ok { 333 return 334 } 335 memcacheDirtyWriteMeter.Mark(int64(size)) 336 337 // Create the cached entry for this node 338 entry := &cachedNode{ 339 node: node, 340 size: uint16(size), 341 flushPrev: db.newest, 342 } 343 entry.forChilds(func(child common.Hash) { 344 if c := db.dirties[child]; c != nil { 345 c.parents++ 346 } 347 }) 348 db.dirties[hash] = entry 349 350 // Update the flush-list endpoints 351 if db.oldest == (common.Hash{}) { 352 db.oldest, db.newest = hash, hash 353 } else { 354 db.dirties[db.newest].flushNext, db.newest = hash, hash 355 } 356 db.dirtiesSize += common.StorageSize(common.HashLength + entry.size) 357 } 358 359 // RawNode retrieves an encoded cached trie node from memory. If it cannot be found 360 // cached, the method queries the persistent database for the content. This function 361 // will not return the metaroot. 362 func (db *Database) RawNode(h common.Hash) ([]byte, error) { 363 if h == (common.Hash{}) { 364 return nil, errors.New("not found") 365 } 366 enc, cn, err := db.node(h) 367 if err != nil { 368 return nil, err 369 } 370 if len(enc) > 0 { 371 return enc, nil 372 } 373 return cn.rlp(), nil 374 } 375 376 // EncodedNode returns a formatted [node] when given a node hash. If no node 377 // exists, nil is returned. This function will return the metaroot. 378 func (db *Database) EncodedNode(h common.Hash) node { 379 enc, cn, err := db.node(h) 380 if err != nil { 381 return nil 382 } 383 if len(enc) > 0 { 384 return mustDecodeNode(h[:], enc) 385 } 386 return cn.obj(h) 387 } 388 389 // node retrieves an encoded cached trie node from memory. If it cannot be found 390 // cached, the method queries the persistent database for the content. 391 // 392 // We do not return a single node representation to avoid useless 393 // encoding/decoding depending on the caller. 394 func (db *Database) node(hash common.Hash) ([]byte, *cachedNode, error) { 395 // Retrieve the node from the clean cache if available 396 if db.cleans != nil { 397 k := hash[:] 398 enc, found := db.cleans.HasGet(nil, k) 399 if found { 400 if len(enc) > 0 { 401 memcacheCleanHitMeter.Mark(1) 402 memcacheCleanReadMeter.Mark(int64(len(enc))) 403 return enc, nil, nil 404 } else { 405 // Delete anything from cache that may have been added incorrectly 406 // 407 // This will prevent a panic as callers of this function assume the raw 408 // or cached node is populated. 409 log.Debug("removing empty value found in cleans cache", "k", k) 410 db.cleans.Del(k) 411 } 412 } 413 } 414 // Retrieve the node from the dirty cache if available 415 db.lock.RLock() 416 dirty := db.dirties[hash] 417 db.lock.RUnlock() 418 419 if dirty != nil { 420 memcacheDirtyHitMeter.Mark(1) 421 memcacheDirtyReadMeter.Mark(int64(dirty.size)) 422 return nil, dirty, nil 423 } 424 memcacheDirtyMissMeter.Mark(1) 425 426 // Content unavailable in memory, attempt to retrieve from disk 427 enc := rawdb.ReadTrieNode(db.diskdb, hash) 428 if len(enc) > 0 { 429 if db.cleans != nil { 430 db.cleans.Set(hash[:], enc) 431 memcacheCleanMissMeter.Mark(1) 432 memcacheCleanWriteMeter.Mark(int64(len(enc))) 433 } 434 return enc, nil, nil 435 } 436 return nil, nil, errors.New("not found") 437 } 438 439 // Nodes retrieves the hashes of all the nodes cached within the memory database. 440 // This method is extremely expensive and should only be used to validate internal 441 // states in test code. 442 func (db *Database) Nodes() []common.Hash { 443 db.lock.RLock() 444 defer db.lock.RUnlock() 445 446 var hashes = make([]common.Hash, 0, len(db.dirties)) 447 for hash := range db.dirties { 448 if hash != (common.Hash{}) { // Special case for "root" references/nodes 449 hashes = append(hashes, hash) 450 } 451 } 452 return hashes 453 } 454 455 // Reference adds a new reference from a parent node to a child node. 456 // This function is used to add reference between internal trie node 457 // and external node(e.g. storage trie root), all internal trie nodes 458 // are referenced together by database itself. 459 func (db *Database) Reference(child common.Hash, parent common.Hash) { 460 db.lock.Lock() 461 defer db.lock.Unlock() 462 463 db.reference(child, parent) 464 } 465 466 func (db *Database) reference(child common.Hash, parent common.Hash) { 467 // If the node does not exist, it's a node pulled from disk, skip 468 node, ok := db.dirties[child] 469 if !ok { 470 return 471 } 472 // If the reference already exists, only duplicate for roots 473 if db.dirties[parent].children == nil { 474 db.dirties[parent].children = make(map[common.Hash]uint16) 475 db.childrenSize += cachedNodeChildrenSize 476 } else if _, ok = db.dirties[parent].children[child]; ok && parent != (common.Hash{}) { 477 return 478 } 479 node.parents++ 480 db.dirties[parent].children[child]++ 481 if db.dirties[parent].children[child] == 1 { 482 db.childrenSize += common.HashLength + 2 // uint16 counter 483 } 484 } 485 486 // Dereference removes an existing reference from a root node. 487 func (db *Database) Dereference(root common.Hash) { 488 // Sanity check to ensure that the meta-root is not removed 489 if root == (common.Hash{}) { 490 log.Error("Attempted to dereference the trie cache meta root") 491 return 492 } 493 494 db.lock.Lock() 495 defer db.lock.Unlock() 496 nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now() 497 db.dereference(root, common.Hash{}) 498 499 db.gcnodes += uint64(nodes - len(db.dirties)) 500 db.gcsize += storage - db.dirtiesSize 501 db.gctime += time.Since(start) 502 503 memcacheDirtySizeGauge.Update(float64(db.dirtiesSize)) 504 memcacheDirtyChildSizeGauge.Update(float64(db.childrenSize)) 505 memcacheDirtyNodesGauge.Update(int64(len(db.dirties))) 506 507 memcacheGCTimeTimer.Update(time.Since(start)) 508 memcacheGCSizeMeter.Mark(int64(storage - db.dirtiesSize)) 509 memcacheGCNodesMeter.Mark(int64(nodes - len(db.dirties))) 510 511 log.Debug("Dereferenced trie from memory database", "nodes", nodes-len(db.dirties), "size", storage-db.dirtiesSize, "time", time.Since(start), 512 "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) 513 } 514 515 // dereference is the private locked version of Dereference. 516 func (db *Database) dereference(child common.Hash, parent common.Hash) { 517 // Dereference the parent-child 518 node := db.dirties[parent] 519 520 if node.children != nil && node.children[child] > 0 { 521 node.children[child]-- 522 if node.children[child] == 0 { 523 delete(node.children, child) 524 db.childrenSize -= (common.HashLength + 2) // uint16 counter 525 } 526 } 527 // If the child does not exist, it's a previously committed node. 528 node, ok := db.dirties[child] 529 if !ok { 530 return 531 } 532 // If there are no more references to the child, delete it and cascade 533 if node.parents > 0 { 534 // This is a special cornercase where a node loaded from disk (i.e. not in the 535 // memcache any more) gets reinjected as a new node (short node split into full, 536 // then reverted into short), causing a cached node to have no parents. That is 537 // no problem in itself, but don't make maxint parents out of it. 538 node.parents-- 539 } 540 if node.parents == 0 { 541 // Remove the node from the flush-list 542 switch child { 543 case db.oldest: 544 db.oldest = node.flushNext 545 db.dirties[node.flushNext].flushPrev = common.Hash{} 546 case db.newest: 547 db.newest = node.flushPrev 548 db.dirties[node.flushPrev].flushNext = common.Hash{} 549 default: 550 db.dirties[node.flushPrev].flushNext = node.flushNext 551 db.dirties[node.flushNext].flushPrev = node.flushPrev 552 } 553 // Dereference all children and delete the node 554 node.forChilds(func(hash common.Hash) { 555 db.dereference(hash, child) 556 }) 557 delete(db.dirties, child) 558 db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size)) 559 if node.children != nil { 560 db.childrenSize -= cachedNodeChildrenSize 561 } 562 } 563 } 564 565 // flushItem is used to track all [cachedNode]s that must be written to disk 566 type flushItem struct { 567 hash common.Hash 568 node *cachedNode 569 rlp []byte 570 } 571 572 // writeFlushItems writes all items in [toFlush] to disk in batches of 573 // [ethdb.IdealBatchSize]. This function does not access any variables inside 574 // of [Database] and does not need to be synchronized. 575 func (db *Database) writeFlushItems(toFlush []*flushItem) error { 576 batch := db.diskdb.NewBatch() 577 for _, item := range toFlush { 578 rlp := item.node.rlp() 579 item.rlp = rlp 580 rawdb.WriteTrieNode(batch, item.hash, rlp) 581 582 // If we exceeded the ideal batch size, commit and reset 583 if batch.ValueSize() >= ethdb.IdealBatchSize { 584 if err := batch.Write(); err != nil { 585 log.Error("Failed to write flush list to disk", "err", err) 586 return err 587 } 588 batch.Reset() 589 } 590 } 591 592 // Flush out any remainder data from the last batch 593 if err := batch.Write(); err != nil { 594 log.Error("Failed to write flush list to disk", "err", err) 595 return err 596 } 597 598 return nil 599 } 600 601 // Cap iteratively flushes old but still referenced trie nodes until the total 602 // memory usage goes below the given threshold. 603 func (db *Database) Cap(limit common.StorageSize) error { 604 start := time.Now() 605 // If the preimage cache got large enough, push to disk. If it's still small 606 // leave for later to deduplicate writes. 607 if db.preimages != nil { 608 if err := db.preimages.commit(false); err != nil { 609 return err 610 } 611 } 612 613 // It is important that outside code doesn't see an inconsistent state 614 // (referenced data removed from memory cache during commit but not yet 615 // in persistent storage). This is ensured by only uncaching existing 616 // data when the database write finalizes. 617 db.lock.RLock() 618 lockStart := time.Now() 619 nodes, storage := len(db.dirties), db.dirtiesSize 620 621 // db.dirtiesSize only contains the useful data in the cache, but when reporting 622 // the total memory consumption, the maintenance metadata is also needed to be 623 // counted. 624 pendingSize := db.dirtiesSize + common.StorageSize((len(db.dirties)-1)*cachedNodeSize) 625 pendingSize += db.childrenSize - common.StorageSize(len(db.dirties[common.Hash{}].children)*(common.HashLength+2)) 626 if pendingSize <= limit { 627 db.lock.RUnlock() 628 return nil 629 } 630 631 // Keep removing nodes from the flush-list until we're below allowance 632 toFlush := make([]*flushItem, 0, 128) 633 oldest := db.oldest 634 for pendingSize > limit && oldest != (common.Hash{}) { 635 // Fetch the oldest referenced node and push into the batch 636 node := db.dirties[oldest] 637 toFlush = append(toFlush, &flushItem{oldest, node, nil}) 638 639 // Iterate to the next flush item, or abort if the size cap was achieved. Size 640 // is the total size, including the useful cached data (hash -> blob), the 641 // cache item metadata, as well as external children mappings. 642 pendingSize -= common.StorageSize(common.HashLength + int(node.size) + cachedNodeSize) 643 if node.children != nil { 644 pendingSize -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2)) 645 } 646 oldest = node.flushNext 647 } 648 db.lock.RUnlock() 649 lockTime := time.Since(lockStart) 650 651 // Write nodes to disk 652 if err := db.writeFlushItems(toFlush); err != nil { 653 return err 654 } 655 656 // Flush all written items from dirites 657 // 658 // NOTE: The order of the flushlist may have changed while the lock was not 659 // held, so we cannot just iterate to [oldest]. 660 db.lock.Lock() 661 defer db.lock.Unlock() 662 lockStart = time.Now() 663 for _, item := range toFlush { 664 // [item.rlp] is populated in [writeFlushItems] 665 db.removeFromDirties(item.hash, item.rlp) 666 } 667 db.flushnodes += uint64(nodes - len(db.dirties)) 668 db.flushsize += storage - db.dirtiesSize 669 db.flushtime += time.Since(start) 670 671 memcacheDirtySizeGauge.Update(float64(db.dirtiesSize)) 672 memcacheDirtyChildSizeGauge.Update(float64(db.childrenSize)) 673 memcacheDirtyNodesGauge.Update(int64(len(db.dirties))) 674 675 memcacheFlushMeter.Mark(1) 676 memcacheFlushTimeTimer.Update(time.Since(start)) 677 memcacheFlushLockTimeTimer.Update(lockTime + time.Since(lockStart)) 678 memcacheFlushSizeMeter.Mark(int64(storage - db.dirtiesSize)) 679 memcacheFlushNodesMeter.Mark(int64(nodes - len(db.dirties))) 680 681 log.Debug("Persisted nodes from memory database", "nodes", nodes-len(db.dirties), "size", storage-db.dirtiesSize, "time", time.Since(start), 682 "flushnodes", db.flushnodes, "flushsize", db.flushsize, "flushtime", db.flushtime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) 683 return nil 684 } 685 686 // Commit iterates over all the children of a particular node, writes them out 687 // to disk, forcefully tearing down all references in both directions. As a side 688 // effect, all pre-images accumulated up to this point are also written. 689 func (db *Database) Commit(node common.Hash, report bool, callback func(common.Hash)) error { 690 start := time.Now() 691 if db.preimages != nil { 692 if err := db.preimages.commit(true); err != nil { 693 return err 694 } 695 } 696 697 // It is important that outside code doesn't see an inconsistent state (referenced 698 // data removed from memory cache during commit but not yet in persistent storage). 699 // This is ensured by only uncaching existing data when the database write finalizes. 700 db.lock.RLock() 701 lockStart := time.Now() 702 nodes, storage := len(db.dirties), db.dirtiesSize 703 toFlush, err := db.commit(node, make([]*flushItem, 0, 128), callback) 704 if err != nil { 705 db.lock.RUnlock() 706 log.Error("Failed to commit trie from trie database", "err", err) 707 return err 708 } 709 db.lock.RUnlock() 710 lockTime := time.Since(lockStart) 711 712 // Write nodes to disk 713 if err := db.writeFlushItems(toFlush); err != nil { 714 return err 715 } 716 717 // Flush all written items from dirites 718 db.lock.Lock() 719 defer db.lock.Unlock() 720 lockStart = time.Now() 721 for _, item := range toFlush { 722 // [item.rlp] is populated in [writeFlushItems] 723 db.removeFromDirties(item.hash, item.rlp) 724 } 725 726 memcacheDirtySizeGauge.Update(float64(db.dirtiesSize)) 727 memcacheDirtyChildSizeGauge.Update(float64(db.childrenSize)) 728 memcacheDirtyNodesGauge.Update(int64(len(db.dirties))) 729 730 memcacheCommitMeter.Mark(1) 731 memcacheCommitTimeTimer.Update(time.Since(start)) 732 memcacheCommitLockTimeTimer.Update(lockTime + time.Since(lockStart)) 733 memcacheCommitSizeMeter.Mark(int64(storage - db.dirtiesSize)) 734 memcacheCommitNodesMeter.Mark(int64(nodes - len(db.dirties))) 735 736 logger := log.Info 737 if !report { 738 logger = log.Debug 739 } 740 logger("Persisted trie from memory database", "nodes", nodes-len(db.dirties)+int(db.flushnodes), "size", storage-db.dirtiesSize+db.flushsize, "time", time.Since(start)+db.flushtime, 741 "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) 742 743 // Reset the garbage collection statistics 744 db.gcnodes, db.gcsize, db.gctime = 0, 0, 0 745 db.flushnodes, db.flushsize, db.flushtime = 0, 0, 0 746 return nil 747 } 748 749 // commit is the private locked version of Commit. This function does not 750 // mutate any data, rather it collects all data that should be committed. 751 // 752 // [callback] will be invoked as soon as it is determined a trie node will be 753 // flushed to disk (before it is actually written). 754 func (db *Database) commit(hash common.Hash, toFlush []*flushItem, callback func(common.Hash)) ([]*flushItem, error) { 755 // If the node does not exist, it's a previously committed node 756 node, ok := db.dirties[hash] 757 if !ok { 758 return toFlush, nil 759 } 760 var err error 761 node.forChilds(func(child common.Hash) { 762 if err == nil { 763 toFlush, err = db.commit(child, toFlush, callback) 764 } 765 }) 766 if err != nil { 767 return nil, err 768 } 769 // By processing the children of each node before the node itself, we ensure 770 // that children are committed before their parents (an invariant of this 771 // package). 772 toFlush = append(toFlush, &flushItem{hash, node, nil}) 773 if callback != nil { 774 callback(hash) 775 } 776 return toFlush, nil 777 } 778 779 // removeFromDirties is invoked after database writes and implements dirty data uncaching. 780 // 781 // This is the post-processing step of a commit operation where the already persisted trie is 782 // removed from the dirty cache and moved into the clean cache. The reason behind 783 // the two-phase commit is to ensure data availability while moving from memory 784 // to disk. 785 // 786 // It is assumed the caller holds the [dirtiesLock] when this function is 787 // called. 788 func (db *Database) removeFromDirties(hash common.Hash, rlp []byte) { 789 // If the node does not exist, we're done on this path. This could happen if 790 // nodes are capped to disk while another thread is committing those same 791 // nodes. 792 node, ok := db.dirties[hash] 793 if !ok { 794 return 795 } 796 // Node still exists, remove it from the flush-list 797 switch hash { 798 case db.oldest: 799 db.oldest = node.flushNext 800 db.dirties[node.flushNext].flushPrev = common.Hash{} 801 case db.newest: 802 db.newest = node.flushPrev 803 db.dirties[node.flushPrev].flushNext = common.Hash{} 804 default: 805 db.dirties[node.flushPrev].flushNext = node.flushNext 806 db.dirties[node.flushNext].flushPrev = node.flushPrev 807 } 808 // Remove the node from the dirty cache 809 delete(db.dirties, hash) 810 db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size)) 811 if node.children != nil { 812 db.childrenSize -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2)) 813 } 814 // Move the flushed node into the clean cache to prevent insta-reloads 815 if db.cleans != nil { 816 db.cleans.Set(hash[:], rlp) 817 memcacheCleanWriteMeter.Mark(int64(len(rlp))) 818 } 819 } 820 821 // Update inserts the dirty nodes in provided nodeset into database and 822 // links the account trie with multiple storage tries if necessary. 823 func (db *Database) Update(nodes *MergedNodeSet) error { 824 db.lock.Lock() 825 defer db.lock.Unlock() 826 827 return db.update(nodes) 828 } 829 830 // UpdateAndReferenceRoot inserts the dirty nodes in provided nodeset into 831 // database and links the account trie with multiple storage tries if necessary, 832 // then adds a reference [from] root to the metaroot while holding the db's lock. 833 func (db *Database) UpdateAndReferenceRoot(nodes *MergedNodeSet, root common.Hash) error { 834 db.lock.Lock() 835 defer db.lock.Unlock() 836 837 if err := db.update(nodes); err != nil { 838 return err 839 } 840 db.reference(root, common.Hash{}) 841 return nil 842 } 843 844 func (db *Database) update(nodes *MergedNodeSet) error { 845 // Insert dirty nodes into the database. In the same tree, it must be 846 // ensured that children are inserted first, then parent so that children 847 // can be linked with their parent correctly. 848 // 849 // Note, the storage tries must be flushed before the account trie to 850 // retain the invariant that children go into the dirty cache first. 851 var order []common.Hash 852 for owner := range nodes.sets { 853 if owner == (common.Hash{}) { 854 continue 855 } 856 order = append(order, owner) 857 } 858 if _, ok := nodes.sets[common.Hash{}]; ok { 859 order = append(order, common.Hash{}) 860 } 861 for _, owner := range order { 862 subset := nodes.sets[owner] 863 for _, path := range subset.paths { 864 n, ok := subset.nodes[path] 865 if !ok { 866 return fmt.Errorf("missing node %x %v", owner, path) 867 } 868 db.insert(n.hash, int(n.size), n.node) 869 } 870 } 871 // Link up the account trie and storage trie if the node points 872 // to an account trie leaf. 873 if set, present := nodes.sets[common.Hash{}]; present { 874 for _, n := range set.leaves { 875 var account types.StateAccount 876 if err := rlp.DecodeBytes(n.blob, &account); err != nil { 877 return err 878 } 879 if account.Root != emptyRoot { 880 db.reference(account.Root, n.parent) 881 } 882 } 883 } 884 return nil 885 } 886 887 // Size returns the current storage size of the memory cache in front of the 888 // persistent database layer. 889 func (db *Database) Size() (common.StorageSize, common.StorageSize) { 890 // db.dirtiesSize only contains the useful data in the cache, but when reporting 891 // the total memory consumption, the maintenance metadata is also needed to be 892 // counted. 893 db.lock.RLock() 894 defer db.lock.RUnlock() 895 var metadataSize = common.StorageSize((len(db.dirties) - 1) * cachedNodeSize) 896 var metarootRefs = common.StorageSize(len(db.dirties[common.Hash{}].children) * (common.HashLength + 2)) 897 var preimageSize common.StorageSize 898 if db.preimages != nil { 899 preimageSize = db.preimages.size() 900 } 901 return db.dirtiesSize + db.childrenSize + metadataSize - metarootRefs, preimageSize 902 } 903 904 // CommitPreimages flushes the dangling preimages to disk. It is meant to be 905 // called when closing the blockchain object, so that preimages are persisted 906 // to the database. 907 func (db *Database) CommitPreimages() error { 908 db.lock.Lock() 909 defer db.lock.Unlock() 910 911 if db.preimages == nil { 912 return nil 913 } 914 return db.preimages.commit(true) 915 } 916 917 // saveCache saves clean state cache to given directory path 918 // using specified CPU cores. 919 func (db *Database) saveCache(dir string, threads int) error { 920 if db.cleans == nil { 921 return nil 922 } 923 log.Info("Writing clean trie cache to disk", "path", dir, "threads", threads) 924 925 start := time.Now() 926 err := db.cleans.SaveToFileConcurrent(dir, threads) 927 if err != nil { 928 log.Error("Failed to persist clean trie cache", "error", err) 929 return err 930 } 931 log.Info("Persisted the clean trie cache", "path", dir, "elapsed", common.PrettyDuration(time.Since(start))) 932 return nil 933 } 934 935 // SaveCachePeriodically atomically saves fast cache data to the given dir with 936 // the specified interval. All dump operation will only use a single CPU core. 937 func (db *Database) SaveCachePeriodically(dir string, interval time.Duration, stopCh <-chan struct{}) { 938 ticker := time.NewTicker(interval) 939 defer ticker.Stop() 940 941 for { 942 select { 943 case <-ticker.C: 944 db.saveCache(dir, 1) 945 case <-stopCh: 946 return 947 } 948 } 949 }