github.com/dim4egster/coreth@v0.10.2/trie/database.go (about) 1 // (c) 2020-2022, Ava Labs, Inc. 2 // 3 // This file is a derived work, based on the go-ethereum library whose original 4 // notices appear below. 5 // 6 // It is distributed under a license compatible with the licensing terms of the 7 // original code from which it is derived. 8 // 9 // Much love to the original authors for their work. 10 // ********** 11 // Copyright 2018 The go-ethereum Authors 12 // This file is part of the go-ethereum library. 13 // 14 // The go-ethereum library is free software: you can redistribute it and/or modify 15 // it under the terms of the GNU Lesser General Public License as published by 16 // the Free Software Foundation, either version 3 of the License, or 17 // (at your option) any later version. 18 // 19 // The go-ethereum library is distributed in the hope that it will be useful, 20 // but WITHOUT ANY WARRANTY; without even the implied warranty of 21 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 // GNU Lesser General Public License for more details. 23 // 24 // You should have received a copy of the GNU Lesser General Public License 25 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 26 27 package trie 28 29 import ( 30 "errors" 31 "fmt" 32 "io" 33 "reflect" 34 "sync" 35 "time" 36 37 "github.com/VictoriaMetrics/fastcache" 38 "github.com/dim4egster/coreth/core/rawdb" 39 "github.com/dim4egster/coreth/core/types" 40 "github.com/dim4egster/coreth/ethdb" 41 "github.com/dim4egster/coreth/metrics" 42 "github.com/ethereum/go-ethereum/common" 43 "github.com/ethereum/go-ethereum/log" 44 "github.com/ethereum/go-ethereum/rlp" 45 ) 46 47 var ( 48 memcacheCleanHitMeter = metrics.NewRegisteredMeter("trie/memcache/clean/hit", nil) 49 memcacheCleanMissMeter = metrics.NewRegisteredMeter("trie/memcache/clean/miss", nil) 50 memcacheCleanReadMeter = metrics.NewRegisteredMeter("trie/memcache/clean/read", nil) 51 memcacheCleanWriteMeter = metrics.NewRegisteredMeter("trie/memcache/clean/write", nil) 52 53 memcacheDirtyHitMeter = metrics.NewRegisteredMeter("trie/memcache/dirty/hit", nil) 54 memcacheDirtyMissMeter = metrics.NewRegisteredMeter("trie/memcache/dirty/miss", nil) 55 memcacheDirtyReadMeter = metrics.NewRegisteredMeter("trie/memcache/dirty/read", nil) 56 memcacheDirtyWriteMeter = metrics.NewRegisteredMeter("trie/memcache/dirty/write", nil) 57 memcacheDirtySizeGauge = metrics.NewRegisteredGaugeFloat64("trie/memcache/dirty/size", nil) 58 memcacheDirtyChildSizeGauge = metrics.NewRegisteredGaugeFloat64("trie/memcache/dirty/childsize", nil) 59 memcacheDirtyNodesGauge = metrics.NewRegisteredGauge("trie/memcache/dirty/nodes", nil) 60 61 memcacheFlushMeter = metrics.NewRegisteredMeter("trie/memcache/flush/count", nil) 62 memcacheFlushTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/flush/time", nil) 63 memcacheFlushLockTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/flush/locktime", nil) 64 memcacheFlushNodesMeter = metrics.NewRegisteredMeter("trie/memcache/flush/nodes", nil) 65 memcacheFlushSizeMeter = metrics.NewRegisteredMeter("trie/memcache/flush/size", nil) 66 67 memcacheGCTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/gc/time", nil) 68 memcacheGCNodesMeter = metrics.NewRegisteredMeter("trie/memcache/gc/nodes", nil) 69 memcacheGCSizeMeter = metrics.NewRegisteredMeter("trie/memcache/gc/size", nil) 70 71 memcacheCommitMeter = metrics.NewRegisteredMeter("trie/memcache/commit/count", nil) 72 memcacheCommitTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/commit/time", nil) 73 memcacheCommitLockTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/commit/locktime", nil) 74 memcacheCommitNodesMeter = metrics.NewRegisteredMeter("trie/memcache/commit/nodes", nil) 75 memcacheCommitSizeMeter = metrics.NewRegisteredMeter("trie/memcache/commit/size", nil) 76 ) 77 78 // Database is an intermediate write layer between the trie data structures and 79 // the disk database. The aim is to accumulate trie writes in-memory and only 80 // periodically flush a couple tries to disk, garbage collecting the remainder. 81 // 82 // The trie Database is thread-safe in its mutations and is thread-safe in providing individual, 83 // independent node access. 84 type Database struct { 85 diskdb ethdb.KeyValueStore // Persistent storage for matured trie nodes 86 87 cleans *fastcache.Cache // GC friendly memory cache of clean node RLPs 88 dirties map[common.Hash]*cachedNode // Data and references relationships of dirty trie nodes 89 oldest common.Hash // Oldest tracked node, flush-list head 90 newest common.Hash // Newest tracked node, flush-list tail 91 92 gctime time.Duration // Time spent on garbage collection since last commit 93 gcnodes uint64 // Nodes garbage collected since last commit 94 gcsize common.StorageSize // Data storage garbage collected since last commit 95 96 flushtime time.Duration // Time spent on data flushing since last commit 97 flushnodes uint64 // Nodes flushed since last commit 98 flushsize common.StorageSize // Data storage flushed since last commit 99 100 dirtiesSize common.StorageSize // Storage size of the dirty node cache (exc. metadata) 101 childrenSize common.StorageSize // Storage size of the external children tracking 102 preimages *preimageStore // The store for caching preimages 103 104 lock sync.RWMutex 105 } 106 107 // rawNode is a simple binary blob used to differentiate between collapsed trie 108 // nodes and already encoded RLP binary blobs (while at the same time store them 109 // in the same cache fields). 110 type rawNode []byte 111 112 func (n rawNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } 113 func (n rawNode) fstring(ind string) string { panic("this should never end up in a live trie") } 114 115 func (n rawNode) EncodeRLP(w io.Writer) error { 116 _, err := w.Write(n) 117 return err 118 } 119 120 // rawFullNode represents only the useful data content of a full node, with the 121 // caches and flags stripped out to minimize its data storage. This type honors 122 // the same RLP encoding as the original parent. 123 type rawFullNode [17]node 124 125 func (n rawFullNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } 126 func (n rawFullNode) fstring(ind string) string { panic("this should never end up in a live trie") } 127 128 func (n rawFullNode) EncodeRLP(w io.Writer) error { 129 eb := rlp.NewEncoderBuffer(w) 130 n.encode(eb) 131 return eb.Flush() 132 } 133 134 // rawShortNode represents only the useful data content of a short node, with the 135 // caches and flags stripped out to minimize its data storage. This type honors 136 // the same RLP encoding as the original parent. 137 type rawShortNode struct { 138 Key []byte 139 Val node 140 } 141 142 func (n rawShortNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } 143 func (n rawShortNode) fstring(ind string) string { panic("this should never end up in a live trie") } 144 145 // cachedNode is all the information we know about a single cached trie node 146 // in the memory database write layer. 147 type cachedNode struct { 148 node node // Cached collapsed trie node, or raw rlp data 149 size uint16 // Byte size of the useful cached data 150 151 parents uint32 // Number of live nodes referencing this one 152 children map[common.Hash]uint16 // External children referenced by this node 153 154 flushPrev common.Hash // Previous node in the flush-list 155 flushNext common.Hash // Next node in the flush-list 156 } 157 158 // cachedNodeSize is the raw size of a cachedNode data structure without any 159 // node data included. It's an approximate size, but should be a lot better 160 // than not counting them. 161 var cachedNodeSize = int(reflect.TypeOf(cachedNode{}).Size()) 162 163 // cachedNodeChildrenSize is the raw size of an initialized but empty external 164 // reference map. 165 const cachedNodeChildrenSize = 48 166 167 // rlp returns the raw rlp encoded blob of the cached trie node, either directly 168 // from the cache, or by regenerating it from the collapsed node. 169 func (n *cachedNode) rlp() []byte { 170 if node, ok := n.node.(rawNode); ok { 171 return node 172 } 173 return nodeToBytes(n.node) 174 } 175 176 // obj returns the decoded and expanded trie node, either directly from the cache, 177 // or by regenerating it from the rlp encoded blob. 178 func (n *cachedNode) obj(hash common.Hash) node { 179 if node, ok := n.node.(rawNode); ok { 180 // The raw-blob format nodes are loaded either from the 181 // clean cache or the database, they are all in their own 182 // copy and safe to use unsafe decoder. 183 return mustDecodeNodeUnsafe(hash[:], node) 184 } 185 return expandNode(hash[:], n.node) 186 } 187 188 // forChilds invokes the callback for all the tracked children of this node, 189 // both the implicit ones from inside the node as well as the explicit ones 190 // from outside the node. 191 func (n *cachedNode) forChilds(onChild func(hash common.Hash)) { 192 for child := range n.children { 193 onChild(child) 194 } 195 if _, ok := n.node.(rawNode); !ok { 196 forGatherChildren(n.node, onChild) 197 } 198 } 199 200 // forGatherChildren traverses the node hierarchy of a collapsed storage node and 201 // invokes the callback for all the hashnode children. 202 func forGatherChildren(n node, onChild func(hash common.Hash)) { 203 switch n := n.(type) { 204 case *rawShortNode: 205 forGatherChildren(n.Val, onChild) 206 case rawFullNode: 207 for i := 0; i < 16; i++ { 208 forGatherChildren(n[i], onChild) 209 } 210 case hashNode: 211 onChild(common.BytesToHash(n)) 212 case valueNode, nil, rawNode: 213 default: 214 panic(fmt.Sprintf("unknown node type: %T", n)) 215 } 216 } 217 218 // simplifyNode traverses the hierarchy of an expanded memory node and discards 219 // all the internal caches, returning a node that only contains the raw data. 220 func simplifyNode(n node) node { 221 switch n := n.(type) { 222 case *shortNode: 223 // Short nodes discard the flags and cascade 224 return &rawShortNode{Key: n.Key, Val: simplifyNode(n.Val)} 225 226 case *fullNode: 227 // Full nodes discard the flags and cascade 228 node := rawFullNode(n.Children) 229 for i := 0; i < len(node); i++ { 230 if node[i] != nil { 231 node[i] = simplifyNode(node[i]) 232 } 233 } 234 return node 235 236 case valueNode, hashNode, rawNode: 237 return n 238 239 default: 240 panic(fmt.Sprintf("unknown node type: %T", n)) 241 } 242 } 243 244 // expandNode traverses the node hierarchy of a collapsed storage node and converts 245 // all fields and keys into expanded memory form. 246 func expandNode(hash hashNode, n node) node { 247 switch n := n.(type) { 248 case *rawShortNode: 249 // Short nodes need key and child expansion 250 return &shortNode{ 251 Key: compactToHex(n.Key), 252 Val: expandNode(nil, n.Val), 253 flags: nodeFlag{ 254 hash: hash, 255 }, 256 } 257 258 case rawFullNode: 259 // Full nodes need child expansion 260 node := &fullNode{ 261 flags: nodeFlag{ 262 hash: hash, 263 }, 264 } 265 for i := 0; i < len(node.Children); i++ { 266 if n[i] != nil { 267 node.Children[i] = expandNode(nil, n[i]) 268 } 269 } 270 return node 271 272 case valueNode, hashNode: 273 return n 274 275 default: 276 panic(fmt.Sprintf("unknown node type: %T", n)) 277 } 278 } 279 280 // Config defines all necessary options for database. 281 type Config struct { 282 Cache int // Memory allowance (MB) to use for caching trie nodes in memory 283 Preimages bool // Flag whether the preimage of trie key is recorded 284 } 285 286 // NewDatabase creates a new trie database to store ephemeral trie content before 287 // its written out to disk or garbage collected. No read cache is created, so all 288 // data retrievals will hit the underlying disk database. 289 func NewDatabase(diskdb ethdb.KeyValueStore) *Database { 290 return NewDatabaseWithConfig(diskdb, nil) 291 } 292 293 // NewDatabaseWithConfig creates a new trie database to store ephemeral trie content 294 // before its written out to disk or garbage collected. It also acts as a read cache 295 // for nodes loaded from disk. 296 func NewDatabaseWithConfig(diskdb ethdb.KeyValueStore, config *Config) *Database { 297 var cleans *fastcache.Cache 298 if config != nil && config.Cache > 0 { 299 cleans = fastcache.New(config.Cache * 1024 * 1024) 300 } 301 var preimage *preimageStore 302 if config != nil && config.Preimages { 303 preimage = newPreimageStore(diskdb) 304 } 305 db := &Database{ 306 diskdb: diskdb, 307 cleans: cleans, 308 dirties: map[common.Hash]*cachedNode{{}: { 309 children: make(map[common.Hash]uint16), 310 }}, 311 preimages: preimage, 312 } 313 return db 314 } 315 316 // DiskDB retrieves the persistent storage backing the trie database. 317 func (db *Database) DiskDB() ethdb.KeyValueStore { 318 return db.diskdb 319 } 320 321 // insert inserts a simplified trie node into the memory database. 322 // All nodes inserted by this function will be reference tracked 323 // and in theory should only used for **trie nodes** insertion. 324 func (db *Database) insert(hash common.Hash, size int, node node) { 325 // If the node's already cached, skip 326 if _, ok := db.dirties[hash]; ok { 327 return 328 } 329 memcacheDirtyWriteMeter.Mark(int64(size)) 330 331 // Create the cached entry for this node 332 entry := &cachedNode{ 333 node: node, 334 size: uint16(size), 335 flushPrev: db.newest, 336 } 337 entry.forChilds(func(child common.Hash) { 338 if c := db.dirties[child]; c != nil { 339 c.parents++ 340 } 341 }) 342 db.dirties[hash] = entry 343 344 // Update the flush-list endpoints 345 if db.oldest == (common.Hash{}) { 346 db.oldest, db.newest = hash, hash 347 } else { 348 db.dirties[db.newest].flushNext, db.newest = hash, hash 349 } 350 db.dirtiesSize += common.StorageSize(common.HashLength + entry.size) 351 } 352 353 // RawNode retrieves an encoded cached trie node from memory. If it cannot be found 354 // cached, the method queries the persistent database for the content. This function 355 // will not return the metaroot. 356 func (db *Database) RawNode(h common.Hash) ([]byte, error) { 357 if h == (common.Hash{}) { 358 return nil, errors.New("not found") 359 } 360 enc, cn, err := db.node(h) 361 if err != nil { 362 return nil, err 363 } 364 if len(enc) > 0 { 365 return enc, nil 366 } 367 return cn.rlp(), nil 368 } 369 370 // EncodedNode returns a formatted [node] when given a node hash. If no node 371 // exists, nil is returned. This function will return the metaroot. 372 func (db *Database) EncodedNode(h common.Hash) node { 373 enc, cn, err := db.node(h) 374 if err != nil { 375 return nil 376 } 377 if len(enc) > 0 { 378 return mustDecodeNode(h[:], enc) 379 } 380 return cn.obj(h) 381 } 382 383 // node retrieves an encoded cached trie node from memory. If it cannot be found 384 // cached, the method queries the persistent database for the content. 385 // 386 // We do not return a single node representation to avoid useless 387 // encoding/decoding depending on the caller. 388 func (db *Database) node(hash common.Hash) ([]byte, *cachedNode, error) { 389 // Retrieve the node from the clean cache if available 390 if db.cleans != nil { 391 if enc := db.cleans.Get(nil, hash[:]); enc != nil { 392 memcacheCleanHitMeter.Mark(1) 393 memcacheCleanReadMeter.Mark(int64(len(enc))) 394 return enc, nil, nil 395 } 396 } 397 // Retrieve the node from the dirty cache if available 398 db.lock.RLock() 399 dirty := db.dirties[hash] 400 db.lock.RUnlock() 401 402 if dirty != nil { 403 memcacheDirtyHitMeter.Mark(1) 404 memcacheDirtyReadMeter.Mark(int64(dirty.size)) 405 return nil, dirty, nil 406 } 407 memcacheDirtyMissMeter.Mark(1) 408 409 // Content unavailable in memory, attempt to retrieve from disk 410 enc := rawdb.ReadTrieNode(db.diskdb, hash) 411 if len(enc) != 0 { 412 if db.cleans != nil { 413 db.cleans.Set(hash[:], enc) 414 memcacheCleanMissMeter.Mark(1) 415 memcacheCleanWriteMeter.Mark(int64(len(enc))) 416 } 417 return enc, nil, nil 418 } 419 return nil, nil, errors.New("not found") 420 } 421 422 // Nodes retrieves the hashes of all the nodes cached within the memory database. 423 // This method is extremely expensive and should only be used to validate internal 424 // states in test code. 425 func (db *Database) Nodes() []common.Hash { 426 db.lock.RLock() 427 defer db.lock.RUnlock() 428 429 var hashes = make([]common.Hash, 0, len(db.dirties)) 430 for hash := range db.dirties { 431 if hash != (common.Hash{}) { // Special case for "root" references/nodes 432 hashes = append(hashes, hash) 433 } 434 } 435 return hashes 436 } 437 438 // Reference adds a new reference from a parent node to a child node. 439 // This function is used to add reference between internal trie node 440 // and external node(e.g. storage trie root), all internal trie nodes 441 // are referenced together by database itself. 442 func (db *Database) Reference(child common.Hash, parent common.Hash) { 443 db.lock.Lock() 444 defer db.lock.Unlock() 445 446 db.reference(child, parent) 447 } 448 449 func (db *Database) reference(child common.Hash, parent common.Hash) { 450 // If the node does not exist, it's a node pulled from disk, skip 451 node, ok := db.dirties[child] 452 if !ok { 453 return 454 } 455 // If the reference already exists, only duplicate for roots 456 if db.dirties[parent].children == nil { 457 db.dirties[parent].children = make(map[common.Hash]uint16) 458 db.childrenSize += cachedNodeChildrenSize 459 } else if _, ok = db.dirties[parent].children[child]; ok && parent != (common.Hash{}) { 460 return 461 } 462 node.parents++ 463 db.dirties[parent].children[child]++ 464 if db.dirties[parent].children[child] == 1 { 465 db.childrenSize += common.HashLength + 2 // uint16 counter 466 } 467 } 468 469 // Dereference removes an existing reference from a root node. 470 func (db *Database) Dereference(root common.Hash) { 471 // Sanity check to ensure that the meta-root is not removed 472 if root == (common.Hash{}) { 473 log.Error("Attempted to dereference the trie cache meta root") 474 return 475 } 476 477 db.lock.Lock() 478 defer db.lock.Unlock() 479 nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now() 480 db.dereference(root, common.Hash{}) 481 482 db.gcnodes += uint64(nodes - len(db.dirties)) 483 db.gcsize += storage - db.dirtiesSize 484 db.gctime += time.Since(start) 485 486 memcacheDirtySizeGauge.Update(float64(db.dirtiesSize)) 487 memcacheDirtyChildSizeGauge.Update(float64(db.childrenSize)) 488 memcacheDirtyNodesGauge.Update(int64(len(db.dirties))) 489 490 memcacheGCTimeTimer.Update(time.Since(start)) 491 memcacheGCSizeMeter.Mark(int64(storage - db.dirtiesSize)) 492 memcacheGCNodesMeter.Mark(int64(nodes - len(db.dirties))) 493 494 log.Debug("Dereferenced trie from memory database", "nodes", nodes-len(db.dirties), "size", storage-db.dirtiesSize, "time", time.Since(start), 495 "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) 496 } 497 498 // dereference is the private locked version of Dereference. 499 func (db *Database) dereference(child common.Hash, parent common.Hash) { 500 // Dereference the parent-child 501 node := db.dirties[parent] 502 503 if node.children != nil && node.children[child] > 0 { 504 node.children[child]-- 505 if node.children[child] == 0 { 506 delete(node.children, child) 507 db.childrenSize -= (common.HashLength + 2) // uint16 counter 508 } 509 } 510 // If the child does not exist, it's a previously committed node. 511 node, ok := db.dirties[child] 512 if !ok { 513 return 514 } 515 // If there are no more references to the child, delete it and cascade 516 if node.parents > 0 { 517 // This is a special cornercase where a node loaded from disk (i.e. not in the 518 // memcache any more) gets reinjected as a new node (short node split into full, 519 // then reverted into short), causing a cached node to have no parents. That is 520 // no problem in itself, but don't make maxint parents out of it. 521 node.parents-- 522 } 523 if node.parents == 0 { 524 // Remove the node from the flush-list 525 switch child { 526 case db.oldest: 527 db.oldest = node.flushNext 528 db.dirties[node.flushNext].flushPrev = common.Hash{} 529 case db.newest: 530 db.newest = node.flushPrev 531 db.dirties[node.flushPrev].flushNext = common.Hash{} 532 default: 533 db.dirties[node.flushPrev].flushNext = node.flushNext 534 db.dirties[node.flushNext].flushPrev = node.flushPrev 535 } 536 // Dereference all children and delete the node 537 node.forChilds(func(hash common.Hash) { 538 db.dereference(hash, child) 539 }) 540 delete(db.dirties, child) 541 db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size)) 542 if node.children != nil { 543 db.childrenSize -= cachedNodeChildrenSize 544 } 545 } 546 } 547 548 // flushItem is used to track all [cachedNode]s that must be written to disk 549 type flushItem struct { 550 hash common.Hash 551 node *cachedNode 552 rlp []byte 553 } 554 555 // writeFlushItems writes all items in [toFlush] to disk in batches of 556 // [ethdb.IdealBatchSize]. This function does not access any variables inside 557 // of [Database] and does not need to be synchronized. 558 func (db *Database) writeFlushItems(toFlush []flushItem) error { 559 batch := db.diskdb.NewBatch() 560 for _, item := range toFlush { 561 rlp := item.node.rlp() 562 item.rlp = rlp 563 rawdb.WriteTrieNode(batch, item.hash, rlp) 564 565 // If we exceeded the ideal batch size, commit and reset 566 if batch.ValueSize() >= ethdb.IdealBatchSize { 567 if err := batch.Write(); err != nil { 568 log.Error("Failed to write flush list to disk", "err", err) 569 return err 570 } 571 batch.Reset() 572 } 573 } 574 575 // Flush out any remainder data from the last batch 576 if err := batch.Write(); err != nil { 577 log.Error("Failed to write flush list to disk", "err", err) 578 return err 579 } 580 581 return nil 582 } 583 584 // Cap iteratively flushes old but still referenced trie nodes until the total 585 // memory usage goes below the given threshold. 586 func (db *Database) Cap(limit common.StorageSize) error { 587 start := time.Now() 588 // If the preimage cache got large enough, push to disk. If it's still small 589 // leave for later to deduplicate writes. 590 if db.preimages != nil { 591 if err := db.preimages.commit(false); err != nil { 592 return err 593 } 594 } 595 596 // It is important that outside code doesn't see an inconsistent state 597 // (referenced data removed from memory cache during commit but not yet 598 // in persistent storage). This is ensured by only uncaching existing 599 // data when the database write finalizes. 600 db.lock.RLock() 601 lockStart := time.Now() 602 nodes, storage := len(db.dirties), db.dirtiesSize 603 604 // db.dirtiesSize only contains the useful data in the cache, but when reporting 605 // the total memory consumption, the maintenance metadata is also needed to be 606 // counted. 607 pendingSize := db.dirtiesSize + common.StorageSize((len(db.dirties)-1)*cachedNodeSize) 608 pendingSize += db.childrenSize - common.StorageSize(len(db.dirties[common.Hash{}].children)*(common.HashLength+2)) 609 if pendingSize <= limit { 610 db.lock.RUnlock() 611 return nil 612 } 613 614 // Keep removing nodes from the flush-list until we're below allowance 615 toFlush := make([]flushItem, 0, 128) 616 oldest := db.oldest 617 for pendingSize > limit && oldest != (common.Hash{}) { 618 // Fetch the oldest referenced node and push into the batch 619 node := db.dirties[oldest] 620 toFlush = append(toFlush, flushItem{oldest, node, nil}) 621 622 // Iterate to the next flush item, or abort if the size cap was achieved. Size 623 // is the total size, including the useful cached data (hash -> blob), the 624 // cache item metadata, as well as external children mappings. 625 pendingSize -= common.StorageSize(common.HashLength + int(node.size) + cachedNodeSize) 626 if node.children != nil { 627 pendingSize -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2)) 628 } 629 oldest = node.flushNext 630 } 631 db.lock.RUnlock() 632 lockTime := time.Since(lockStart) 633 634 // Write nodes to disk 635 if err := db.writeFlushItems(toFlush); err != nil { 636 return err 637 } 638 639 // Flush all written items from dirites 640 // 641 // NOTE: The order of the flushlist may have changed while the lock was not 642 // held, so we cannot just iterate to [oldest]. 643 db.lock.Lock() 644 defer db.lock.Unlock() 645 lockStart = time.Now() 646 for _, item := range toFlush { 647 // [item.rlp] is populated in [writeFlushItems] 648 db.removeFromDirties(item.hash, item.rlp) 649 } 650 db.flushnodes += uint64(nodes - len(db.dirties)) 651 db.flushsize += storage - db.dirtiesSize 652 db.flushtime += time.Since(start) 653 654 memcacheDirtySizeGauge.Update(float64(db.dirtiesSize)) 655 memcacheDirtyChildSizeGauge.Update(float64(db.childrenSize)) 656 memcacheDirtyNodesGauge.Update(int64(len(db.dirties))) 657 658 memcacheFlushMeter.Mark(1) 659 memcacheFlushTimeTimer.Update(time.Since(start)) 660 memcacheFlushLockTimeTimer.Update(lockTime + time.Since(lockStart)) 661 memcacheFlushSizeMeter.Mark(int64(storage - db.dirtiesSize)) 662 memcacheFlushNodesMeter.Mark(int64(nodes - len(db.dirties))) 663 664 log.Debug("Persisted nodes from memory database", "nodes", nodes-len(db.dirties), "size", storage-db.dirtiesSize, "time", time.Since(start), 665 "flushnodes", db.flushnodes, "flushsize", db.flushsize, "flushtime", db.flushtime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) 666 return nil 667 } 668 669 // Commit iterates over all the children of a particular node, writes them out 670 // to disk, forcefully tearing down all references in both directions. As a side 671 // effect, all pre-images accumulated up to this point are also written. 672 func (db *Database) Commit(node common.Hash, report bool, callback func(common.Hash)) error { 673 start := time.Now() 674 if db.preimages != nil { 675 if err := db.preimages.commit(true); err != nil { 676 return err 677 } 678 } 679 680 // It is important that outside code doesn't see an inconsistent state (referenced 681 // data removed from memory cache during commit but not yet in persistent storage). 682 // This is ensured by only uncaching existing data when the database write finalizes. 683 db.lock.RLock() 684 lockStart := time.Now() 685 nodes, storage := len(db.dirties), db.dirtiesSize 686 toFlush, err := db.commit(node, make([]flushItem, 0, 128), callback) 687 if err != nil { 688 log.Error("Failed to commit trie from trie database", "err", err) 689 return err 690 } 691 db.lock.RUnlock() 692 lockTime := time.Since(lockStart) 693 694 // Write nodes to disk 695 if err := db.writeFlushItems(toFlush); err != nil { 696 return err 697 } 698 699 // Flush all written items from dirites 700 db.lock.Lock() 701 defer db.lock.Unlock() 702 lockStart = time.Now() 703 for _, item := range toFlush { 704 // [item.rlp] is populated in [writeFlushItems] 705 db.removeFromDirties(item.hash, item.rlp) 706 } 707 708 memcacheDirtySizeGauge.Update(float64(db.dirtiesSize)) 709 memcacheDirtyChildSizeGauge.Update(float64(db.childrenSize)) 710 memcacheDirtyNodesGauge.Update(int64(len(db.dirties))) 711 712 memcacheCommitMeter.Mark(1) 713 memcacheCommitTimeTimer.Update(time.Since(start)) 714 memcacheCommitLockTimeTimer.Update(lockTime + time.Since(lockStart)) 715 memcacheCommitSizeMeter.Mark(int64(storage - db.dirtiesSize)) 716 memcacheCommitNodesMeter.Mark(int64(nodes - len(db.dirties))) 717 718 logger := log.Info 719 if !report { 720 logger = log.Debug 721 } 722 logger("Persisted trie from memory database", "nodes", nodes-len(db.dirties)+int(db.flushnodes), "size", storage-db.dirtiesSize+db.flushsize, "time", time.Since(start)+db.flushtime, 723 "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) 724 725 // Reset the garbage collection statistics 726 db.gcnodes, db.gcsize, db.gctime = 0, 0, 0 727 db.flushnodes, db.flushsize, db.flushtime = 0, 0, 0 728 return nil 729 } 730 731 // commit is the private locked version of Commit. This function does not 732 // mutate any data, rather it collects all data that should be committed. 733 // 734 // [callback] will be invoked as soon as it is determined a trie node will be 735 // flushed to disk (before it is actually written). 736 func (db *Database) commit(hash common.Hash, toFlush []flushItem, callback func(common.Hash)) ([]flushItem, error) { 737 // If the node does not exist, it's a previously committed node 738 node, ok := db.dirties[hash] 739 if !ok { 740 return toFlush, nil 741 } 742 var err error 743 node.forChilds(func(child common.Hash) { 744 if err == nil { 745 toFlush, err = db.commit(child, toFlush, callback) 746 } 747 }) 748 if err != nil { 749 return nil, err 750 } 751 // By processing the children of each node before the node itself, we ensure 752 // that children are committed before their parents (an invariant of this 753 // package). 754 toFlush = append(toFlush, flushItem{hash, node, nil}) 755 if callback != nil { 756 callback(hash) 757 } 758 return toFlush, nil 759 } 760 761 // removeFromDirties is invoked after database writes and implements dirty data uncaching. 762 // 763 // This is the post-processing step of a commit operation where the already persisted trie is 764 // removed from the dirty cache and moved into the clean cache. The reason behind 765 // the two-phase commit is to ensure data availability while moving from memory 766 // to disk. 767 // 768 // It is assumed the caller holds the [dirtiesLock] when this function is 769 // called. 770 func (db *Database) removeFromDirties(hash common.Hash, rlp []byte) { 771 // If the node does not exist, we're done on this path. This could happen if 772 // nodes are capped to disk while another thread is committing those same 773 // nodes. 774 node, ok := db.dirties[hash] 775 if !ok { 776 return 777 } 778 // Node still exists, remove it from the flush-list 779 switch hash { 780 case db.oldest: 781 db.oldest = node.flushNext 782 db.dirties[node.flushNext].flushPrev = common.Hash{} 783 case db.newest: 784 db.newest = node.flushPrev 785 db.dirties[node.flushPrev].flushNext = common.Hash{} 786 default: 787 db.dirties[node.flushPrev].flushNext = node.flushNext 788 db.dirties[node.flushNext].flushPrev = node.flushPrev 789 } 790 // Remove the node from the dirty cache 791 delete(db.dirties, hash) 792 db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size)) 793 if node.children != nil { 794 db.childrenSize -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2)) 795 } 796 // Move the flushed node into the clean cache to prevent insta-reloads 797 if db.cleans != nil { 798 db.cleans.Set(hash[:], rlp) 799 memcacheCleanWriteMeter.Mark(int64(len(rlp))) 800 } 801 } 802 803 // Update inserts the dirty nodes in provided nodeset into database and 804 // links the account trie with multiple storage tries if necessary. 805 func (db *Database) Update(nodes *MergedNodeSet) error { 806 db.lock.Lock() 807 defer db.lock.Unlock() 808 809 return db.update(nodes) 810 } 811 812 // UpdateAndReferenceRoot inserts the dirty nodes in provided nodeset into 813 // database and links the account trie with multiple storage tries if necessary, 814 // then adds a reference [from] root to the metaroot while holding the db's lock. 815 func (db *Database) UpdateAndReferenceRoot(nodes *MergedNodeSet, root common.Hash) error { 816 db.lock.Lock() 817 defer db.lock.Unlock() 818 819 if err := db.update(nodes); err != nil { 820 return err 821 } 822 db.reference(root, common.Hash{}) 823 return nil 824 } 825 826 func (db *Database) update(nodes *MergedNodeSet) error { 827 // Insert dirty nodes into the database. In the same tree, it must be 828 // ensured that children are inserted first, then parent so that children 829 // can be linked with their parent correctly. 830 // 831 // Note, the storage tries must be flushed before the account trie to 832 // retain the invariant that children go into the dirty cache first. 833 var order []common.Hash 834 for owner := range nodes.sets { 835 if owner == (common.Hash{}) { 836 continue 837 } 838 order = append(order, owner) 839 } 840 if _, ok := nodes.sets[common.Hash{}]; ok { 841 order = append(order, common.Hash{}) 842 } 843 for _, owner := range order { 844 subset := nodes.sets[owner] 845 for _, path := range subset.paths { 846 n, ok := subset.nodes[path] 847 if !ok { 848 return fmt.Errorf("missing node %x %v", owner, path) 849 } 850 db.insert(n.hash, int(n.size), n.node) 851 } 852 } 853 // Link up the account trie and storage trie if the node points 854 // to an account trie leaf. 855 if set, present := nodes.sets[common.Hash{}]; present { 856 for _, n := range set.leaves { 857 var account types.StateAccount 858 if err := rlp.DecodeBytes(n.blob, &account); err != nil { 859 return err 860 } 861 if account.Root != emptyRoot { 862 db.reference(account.Root, n.parent) 863 } 864 } 865 } 866 return nil 867 } 868 869 // Size returns the current storage size of the memory cache in front of the 870 // persistent database layer. 871 func (db *Database) Size() (common.StorageSize, common.StorageSize) { 872 // db.dirtiesSize only contains the useful data in the cache, but when reporting 873 // the total memory consumption, the maintenance metadata is also needed to be 874 // counted. 875 db.lock.RLock() 876 defer db.lock.RUnlock() 877 var metadataSize = common.StorageSize((len(db.dirties) - 1) * cachedNodeSize) 878 var metarootRefs = common.StorageSize(len(db.dirties[common.Hash{}].children) * (common.HashLength + 2)) 879 var preimageSize common.StorageSize 880 if db.preimages != nil { 881 preimageSize = db.preimages.size() 882 } 883 return db.dirtiesSize + db.childrenSize + metadataSize - metarootRefs, preimageSize 884 } 885 886 // CommitPreimages flushes the dangling preimages to disk. It is meant to be 887 // called when closing the blockchain object, so that preimages are persisted 888 // to the database. 889 func (db *Database) CommitPreimages() error { 890 db.lock.Lock() 891 defer db.lock.Unlock() 892 893 if db.preimages == nil { 894 return nil 895 } 896 return db.preimages.commit(true) 897 }