github.com/klaytn/klaytn@v1.12.1/storage/statedb/database.go (about) 1 // Modifications Copyright 2018 The klaytn Authors 2 // Copyright 2015 The go-ethereum Authors 3 // This file is part of the go-ethereum library. 4 // 5 // The go-ethereum library is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Lesser General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // The go-ethereum library is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public License 16 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 17 // 18 // This file is derived from trie/database.go (2018/06/04). 19 // Modified and improved for the klaytn development. 20 21 package statedb 22 23 import ( 24 "errors" 25 "fmt" 26 "io" 27 "math/rand" 28 "sync" 29 "time" 30 31 "github.com/klaytn/klaytn/common" 32 "github.com/klaytn/klaytn/log" 33 "github.com/klaytn/klaytn/rlp" 34 "github.com/klaytn/klaytn/storage/database" 35 "github.com/pbnjay/memory" 36 "github.com/rcrowley/go-metrics" 37 ) 38 39 var ( 40 logger = log.NewModuleLogger(log.StorageStateDB) 41 42 // metrics for Cap state 43 memcacheFlushTimeGauge = metrics.NewRegisteredGauge("trie/memcache/flush/time", nil) 44 memcacheFlushNodesGauge = metrics.NewRegisteredGauge("trie/memcache/flush/nodes", nil) 45 memcacheFlushSizeGauge = metrics.NewRegisteredGauge("trie/memcache/flush/size", nil) 46 47 // metrics for GC 48 memcacheGCTimeGauge = metrics.NewRegisteredGauge("trie/memcache/gc/time", nil) 49 memcacheGCNodesMeter = metrics.NewRegisteredMeter("trie/memcache/gc/nodes", nil) 50 memcacheGCSizeMeter = metrics.NewRegisteredMeter("trie/memcache/gc/size", nil) 51 52 // metrics for commit state 53 memcacheCommitTimeGauge = metrics.NewRegisteredGauge("trie/memcache/commit/time", nil) 54 memcacheCommitNodesMeter = metrics.NewRegisteredMeter("trie/memcache/commit/nodes", nil) 55 memcacheCommitSizeMeter = metrics.NewRegisteredMeter("trie/memcache/commit/size", nil) 56 memcacheUncacheTimeGauge = metrics.NewRegisteredGauge("trie/memcache/uncache/time", nil) 57 58 // metrics for state trie cache db 59 memcacheCleanHitMeter = metrics.NewRegisteredMeter("trie/memcache/clean/hit", nil) 60 memcacheCleanMissMeter = metrics.NewRegisteredMeter("trie/memcache/clean/miss", nil) 61 memcacheCleanPrefetchMissMeter = metrics.NewRegisteredMeter("trie/memcache/clean/prefetch/miss", nil) 62 memcacheCleanReadMeter = metrics.NewRegisteredMeter("trie/memcache/clean/read", nil) 63 memcacheCleanWriteMeter = metrics.NewRegisteredMeter("trie/memcache/clean/write", nil) 64 65 // metric of total node number 66 memcacheNodesGauge = metrics.NewRegisteredGauge("trie/memcache/nodes", nil) 67 ) 68 69 // commitResultChSizeLimit limits the size of channel used for commitResult. 70 const commitResultChSizeLimit = 100 * 10000 71 72 // AutoScaling is for auto-scaling cache size. If cacheSize is set to this value, 73 // cache size is set scaling to physical memeory 74 const AutoScaling = -1 75 76 type DatabaseReader interface { 77 // Get retrieves the value associated with key from the database. 78 Get(key []byte) (value []byte, err error) 79 80 // Has retrieves whether a key is present in the database. 81 Has(key []byte) (bool, error) 82 } 83 84 // Database is an intermediate write layer between the trie data structures and 85 // the disk database. The aim is to accumulate trie writes in-memory and only 86 // periodically flush a couple tries to disk, garbage collecting the remainder. 87 type Database struct { 88 diskDB database.DBManager // Persistent storage for matured trie nodes 89 90 nodes map[common.ExtHash]*cachedNode // Data and references relationships of a trie node 91 oldest common.ExtHash // Oldest tracked node, flush-list head 92 newest common.ExtHash // Newest tracked node, flush-list tail 93 94 preimages map[common.Hash][]byte // Preimages of nodes from the secure trie 95 pruningMarks []database.PruningMark // Trie node pruning marks from the pruning trie 96 97 gctime time.Duration // Time spent on garbage collection since last commit 98 gcnodes uint64 // Nodes garbage collected since last commit 99 gcsize common.StorageSize // Data storage garbage collected since last commit 100 gcLock sync.RWMutex // Lock for preventing to garbage collect cachedNode without flushing 101 102 flushtime time.Duration // Time spent on data flushing since last commit 103 flushnodes uint64 // Nodes flushed since last commit 104 flushsize common.StorageSize // Data storage flushed since last commit 105 106 nodesSize common.StorageSize // Storage size of the nodes cache 107 preimagesSize common.StorageSize // Storage size of the preimages cache 108 109 lock sync.RWMutex 110 111 trieNodeCache TrieNodeCache // GC friendly memory cache of trie node RLPs 112 trieNodeCacheConfig *TrieNodeCacheConfig // Configuration of trieNodeCache 113 savingTrieNodeCacheTriggered bool // Whether saving trie node cache has been triggered or not 114 } 115 116 // rawNode is a simple binary blob used to differentiate between collapsed trie 117 // nodes and already encoded RLP binary blobs (while at the same time store them 118 // in the same cache fields). 119 type rawNode []byte 120 121 func (n rawNode) canUnload(uint16, uint16) bool { panic("this should never end up in a live trie") } 122 func (n rawNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } 123 func (n rawNode) fstring(ind string) string { panic("this should never end up in a live trie") } 124 func (n rawNode) lenEncoded() uint16 { panic("this should never end up in a live trie") } 125 func (n rawNode) EncodeRLP(w io.Writer) error { 126 _, err := w.Write([]byte(n)) 127 return err 128 } 129 130 // rawFullNode represents only the useful data content of a full node, with the 131 // caches and flags stripped out to minimize its data database. This type honors 132 // the same RLP encoding as the original parent. 133 type rawFullNode [17]node 134 135 func (n rawFullNode) canUnload(uint16, uint16) bool { panic("this should never end up in a live trie") } 136 func (n rawFullNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } 137 func (n rawFullNode) fstring(ind string) string { panic("this should never end up in a live trie") } 138 func (n rawFullNode) lenEncoded() uint16 { panic("this should never end up in a live trie") } 139 140 func (n rawFullNode) EncodeRLP(w io.Writer) error { 141 var nodes [17]node 142 143 for i, child := range n { 144 if child != nil { 145 nodes[i] = child 146 } else { 147 nodes[i] = nilValueNode 148 } 149 } 150 return rlp.Encode(w, nodes) 151 } 152 153 // rawShortNode represents only the useful data content of a short node, with the 154 // caches and flags stripped out to minimize its data database. This type honors 155 // the same RLP encoding as the original parent. 156 type rawShortNode struct { 157 Key []byte 158 Val node 159 } 160 161 func (n rawShortNode) canUnload(uint16, uint16) bool { 162 panic("this should never end up in a live trie") 163 } 164 func (n rawShortNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } 165 func (n rawShortNode) fstring(ind string) string { panic("this should never end up in a live trie") } 166 func (n rawShortNode) lenEncoded() uint16 { panic("this should never end up in a live trie") } 167 168 // cachedNode is all the information we know about a single cached trie node 169 // in the memory database write layer. 170 type cachedNode struct { 171 node node // Cached collapsed trie node, or raw rlp data 172 // TODO-Klaytn: need to change data type of this if we increase the code size limit 173 size uint16 // Byte size of the useful cached data 174 175 parents uint64 // Number of live nodes referencing this one 176 children map[common.ExtHash]uint64 // External children referenced by this node 177 178 flushPrev common.ExtHash // Previous node in the flush-list 179 flushNext common.ExtHash // Next node in the flush-list 180 } 181 182 // rlp returns the raw rlp encoded blob of the cached trie node, either directly 183 // from the cache, or by regenerating it from the collapsed node. 184 func (n *cachedNode) rlp() []byte { 185 if node, ok := n.node.(rawNode); ok { 186 return node 187 } 188 return nodeToBytes(n.node) 189 } 190 191 // obj returns the decoded and expanded trie node, either directly from the cache, 192 // or by regenerating it from the rlp encoded blob. 193 func (n *cachedNode) obj(hash common.ExtHash) node { 194 if node, ok := n.node.(rawNode); ok { 195 return mustDecodeNode(hash[:], node) 196 } 197 return expandNode(hash[:], n.node) 198 } 199 200 // childs returns all the tracked children of this node, both the implicit ones 201 // from inside the node as well as the explicit ones from outside the node. 202 func (n *cachedNode) childs() []common.ExtHash { 203 children := make([]common.ExtHash, 0, 16) 204 for child := range n.children { 205 children = append(children, child) 206 } 207 if _, ok := n.node.(rawNode); !ok { 208 gatherChildren(n.node, &children) 209 } 210 return children 211 } 212 213 // gatherChildren traverses the node hierarchy of a collapsed database node and 214 // retrieves all the hashnode children. 215 func gatherChildren(n node, children *[]common.ExtHash) { 216 switch n := n.(type) { 217 case *rawShortNode: 218 gatherChildren(n.Val, children) 219 220 case rawFullNode: 221 for i := 0; i < 16; i++ { 222 gatherChildren(n[i], children) 223 } 224 case hashNode: 225 *children = append(*children, common.BytesToExtHash(n)) 226 227 case valueNode, nil, rawNode: 228 229 default: 230 panic(fmt.Sprintf("unknown node type: %T", n)) 231 } 232 } 233 234 // simplifyNode traverses the hierarchy of an expanded memory node and discards 235 // all the internal caches, returning a node that only contains the raw data. 236 func simplifyNode(n node) node { 237 switch n := n.(type) { 238 case *shortNode: 239 // Short nodes discard the flags and cascade 240 return &rawShortNode{Key: n.Key, Val: simplifyNode(n.Val)} 241 242 case *fullNode: 243 // Full nodes discard the flags and cascade 244 node := rawFullNode(n.Children) 245 for i := 0; i < len(node); i++ { 246 if node[i] != nil { 247 node[i] = simplifyNode(node[i]) 248 } 249 } 250 return node 251 252 case valueNode, hashNode, rawNode: 253 return n 254 255 default: 256 panic(fmt.Sprintf("unknown node type: %T", n)) 257 } 258 } 259 260 // expandNode traverses the node hierarchy of a collapsed database node and converts 261 // all fields and keys into expanded memory form. 262 func expandNode(hash hashNode, n node) node { 263 switch n := n.(type) { 264 case *rawShortNode: 265 // Short nodes need key and child expansion 266 return &shortNode{ 267 Key: compactToHex(n.Key), 268 Val: expandNode(nil, n.Val), 269 flags: nodeFlag{ 270 hash: hash, 271 }, 272 } 273 274 case rawFullNode: 275 // Full nodes need child expansion 276 node := &fullNode{ 277 flags: nodeFlag{ 278 hash: hash, 279 }, 280 } 281 for i := 0; i < len(node.Children); i++ { 282 if n[i] != nil { 283 node.Children[i] = expandNode(nil, n[i]) 284 } 285 } 286 return node 287 288 case valueNode, hashNode: 289 return n 290 291 default: 292 panic(fmt.Sprintf("unknown node type: %T", n)) 293 } 294 } 295 296 // NewDatabase creates a new trie database to store ephemeral trie content before 297 // its written out to disk or garbage collected. 298 func NewDatabase(diskDB database.DBManager) *Database { 299 return NewDatabaseWithNewCache(diskDB, GetEmptyTrieNodeCacheConfig()) 300 } 301 302 // NewDatabaseWithNewCache creates a new trie database to store ephemeral trie content 303 // before its written out to disk or garbage collected. It also acts as a read cache 304 // for nodes loaded from disk. 305 func NewDatabaseWithNewCache(diskDB database.DBManager, cacheConfig *TrieNodeCacheConfig) *Database { 306 trieNodeCache, err := NewTrieNodeCache(cacheConfig) 307 if err != nil { 308 logger.Error("Invalid trie node cache config", "err", err, "config", cacheConfig) 309 } 310 311 return &Database{ 312 diskDB: diskDB, 313 nodes: map[common.ExtHash]*cachedNode{{}: {}}, 314 preimages: make(map[common.Hash][]byte), 315 trieNodeCache: trieNodeCache, 316 trieNodeCacheConfig: cacheConfig, 317 } 318 } 319 320 // NewDatabaseWithExistingCache creates a new trie database to store ephemeral trie content 321 // before its written out to disk or garbage collected. It also acts as a read cache 322 // for nodes loaded from disk. 323 func NewDatabaseWithExistingCache(diskDB database.DBManager, cache TrieNodeCache) *Database { 324 return &Database{ 325 diskDB: diskDB, 326 nodes: map[common.ExtHash]*cachedNode{{}: {}}, 327 preimages: make(map[common.Hash][]byte), 328 trieNodeCache: cache, 329 } 330 } 331 332 func getTrieNodeCacheSizeMiB() int { 333 totalPhysicalMemMiB := float64(memory.TotalMemory() / 1024 / 1024) 334 335 if totalPhysicalMemMiB < 10*1024 { 336 return 0 337 } else if totalPhysicalMemMiB < 20*1024 { 338 return 1 * 1024 // allocate 1G for small memory (<20G) 339 } else if totalPhysicalMemMiB < 30*1024 { 340 return 6 * 1024 // allocate 6G for medium memory (<30G) 341 } else { 342 return 10 * 1024 // allocate 10G for large memory (>30G) 343 } 344 } 345 346 // DiskDB retrieves the persistent database backing the trie database. 347 func (db *Database) DiskDB() database.DBManager { 348 return db.diskDB 349 } 350 351 // TrieNodeCache retrieves the trieNodeCache of the trie database. 352 func (db *Database) TrieNodeCache() TrieNodeCache { 353 return db.trieNodeCache 354 } 355 356 // GetTrieNodeCacheConfig returns the configuration of TrieNodeCache. 357 func (db *Database) GetTrieNodeCacheConfig() *TrieNodeCacheConfig { 358 return db.trieNodeCacheConfig 359 } 360 361 // GetTrieNodeLocalCacheByteLimit returns the byte size of trie node cache. 362 func (db *Database) GetTrieNodeLocalCacheByteLimit() uint64 { 363 return uint64(db.trieNodeCacheConfig.LocalCacheSizeMiB) * 1024 * 1024 364 } 365 366 // RLockGCCachedNode locks the GC lock of CachedNode. 367 func (db *Database) RLockGCCachedNode() { 368 db.gcLock.RLock() 369 } 370 371 // RUnlockGCCachedNode unlocks the GC lock of CachedNode. 372 func (db *Database) RUnlockGCCachedNode() { 373 db.gcLock.RUnlock() 374 } 375 376 // NodeChildren retrieves the children of the given hash trie 377 func (db *Database) NodeChildren(hash common.ExtHash) ([]common.ExtHash, error) { 378 childrenHash := make([]common.ExtHash, 0, 16) 379 380 if common.EmptyExtHash(hash) { 381 return childrenHash, ErrZeroHashNode 382 } 383 384 n, _ := db.node(hash) 385 if n == nil { 386 return childrenHash, nil 387 } 388 389 children := make([]node, 0, 16) 390 391 switch n := (n).(type) { 392 case *shortNode: 393 children = []node{n.Val} 394 case *fullNode: 395 for i := 0; i < 17; i++ { 396 if n.Children[i] != nil { 397 children = append(children, n.Children[i]) 398 } 399 } 400 } 401 402 for _, child := range children { 403 n, ok := child.(hashNode) 404 if ok { 405 hash := common.BytesToExtHash(n) 406 childrenHash = append(childrenHash, hash) 407 } 408 } 409 410 return childrenHash, nil 411 } 412 413 // insert inserts a collapsed trie node into the memory database. 414 // The blob size must be specified to allow proper size tracking. 415 // All nodes inserted by this function will be reference tracked 416 // and in theory should only used for **trie nodes** insertion. 417 func (db *Database) insert(hash common.ExtHash, lenEncoded uint16, node node) { 418 // If the node's already cached, skip 419 if _, ok := db.nodes[hash]; ok { 420 return 421 } 422 // Create the cached entry for this node 423 entry := &cachedNode{ 424 node: simplifyNode(node), 425 size: lenEncoded, 426 flushPrev: db.newest, 427 } 428 for _, child := range entry.childs() { 429 if c := db.nodes[child]; c != nil { 430 c.parents++ 431 } 432 } 433 db.nodes[hash] = entry 434 435 // Update the flush-list endpoints 436 if common.EmptyExtHash(db.oldest) { 437 db.oldest, db.newest = hash, hash 438 } else { 439 if _, ok := db.nodes[db.newest]; !ok { 440 missingNewest := db.newest 441 db.newest = db.getLastNodeHashInFlushList() 442 db.nodes[db.newest].flushNext = common.ExtHash{} 443 logger.Error("Found a newest node for missingNewest", "oldNewest", missingNewest, "newNewest", db.newest) 444 } 445 db.nodes[db.newest].flushNext, db.newest = hash, hash 446 } 447 db.nodesSize += common.StorageSize(common.HashLength + entry.size) 448 } 449 450 // insertPreimage writes a new trie node pre-image to the memory database if it's 451 // yet unknown. The method will make a copy of the slice. 452 // 453 // Note, this method assumes that the database's lock is held! 454 func (db *Database) insertPreimage(hash common.Hash, preimage []byte) { 455 if _, ok := db.preimages[hash]; ok { 456 return 457 } 458 db.preimages[hash] = common.CopyBytes(preimage) 459 db.preimagesSize += common.StorageSize(common.HashLength + len(preimage)) 460 } 461 462 // insertPruningMark writes a new pruning mark to the memory database. 463 // Note, this method assumes that the database's lock is held! 464 func (db *Database) insertPruningMark(hash common.ExtHash, blockNum uint64) { 465 db.pruningMarks = append(db.pruningMarks, database.PruningMark{ 466 Number: blockNum, 467 Hash: hash, 468 }) 469 } 470 471 // getCachedNode finds an encoded node in the trie node cache if enabled. 472 func (db *Database) getCachedNode(hash common.ExtHash) []byte { 473 if db.trieNodeCache != nil { 474 if enc := db.trieNodeCache.Get(hash[:]); enc != nil { 475 memcacheCleanHitMeter.Mark(1) 476 memcacheCleanReadMeter.Mark(int64(len(enc))) 477 return enc 478 } 479 } 480 return nil 481 } 482 483 // setCachedNode stores an encoded node to the trie node cache if enabled. 484 func (db *Database) setCachedNode(hash common.ExtHash, enc []byte) { 485 if db.trieNodeCache != nil { 486 db.trieNodeCache.Set(hash[:], enc) 487 memcacheCleanWriteMeter.Mark(int64(len(enc))) 488 } 489 } 490 491 func recordTrieCacheMiss() { 492 memcacheCleanMissMeter.Mark(1) 493 } 494 495 // node retrieves a cached trie node from memory, or returns nil if node can be 496 // found in the memory cache. 497 func (db *Database) node(hash common.ExtHash) (n node, fromDB bool) { 498 // Retrieve the node from the trie node cache if available 499 if enc := db.getCachedNode(hash); enc != nil { 500 if dec, err := decodeNode(hash[:], enc); err == nil { 501 return dec, false 502 } else { 503 logger.Error("node from cached trie node fails to be decoded!", "err", err) 504 } 505 } 506 507 // Retrieve the node from the state cache if available 508 db.lock.RLock() 509 node := db.nodes[hash] 510 db.lock.RUnlock() 511 if node != nil { 512 return node.obj(hash), false 513 } 514 515 // Content unavailable in memory, attempt to retrieve from disk 516 enc, err := db.diskDB.ReadTrieNode(hash) 517 if err != nil || enc == nil { 518 return nil, true 519 } 520 db.setCachedNode(hash, enc) 521 recordTrieCacheMiss() 522 return mustDecodeNode(hash[:], enc), true 523 } 524 525 // Node retrieves an encoded cached trie node from memory. If it cannot be found 526 // cached, the method queries the persistent database for the content. 527 func (db *Database) Node(hash common.ExtHash) ([]byte, error) { 528 if common.EmptyExtHash(hash) { 529 return nil, ErrZeroHashNode 530 } 531 // Retrieve the node from the trie node cache if available 532 if enc := db.getCachedNode(hash); enc != nil { 533 return enc, nil 534 } 535 536 // Retrieve the node from cache if available 537 db.lock.RLock() 538 node := db.nodes[hash] 539 db.lock.RUnlock() 540 541 if node != nil { 542 return node.rlp(), nil 543 } 544 // Content unavailable in memory, attempt to retrieve from disk 545 enc, err := db.diskDB.ReadTrieNode(hash) 546 if err == nil && enc != nil { 547 db.setCachedNode(hash, enc) 548 recordTrieCacheMiss() 549 } 550 return enc, err 551 } 552 553 // NodeFromOld retrieves an encoded cached trie node from memory. If it cannot be found 554 // cached, the method queries the old persistent database for the content. 555 func (db *Database) NodeFromOld(hash common.ExtHash) ([]byte, error) { 556 if common.EmptyExtHash(hash) { 557 return nil, ErrZeroHashNode 558 } 559 // Retrieve the node from the trie node cache if available 560 if enc := db.getCachedNode(hash); enc != nil { 561 return enc, nil 562 } 563 564 // Retrieve the node from cache if available 565 db.lock.RLock() 566 node := db.nodes[hash] 567 db.lock.RUnlock() 568 569 if node != nil { 570 return node.rlp(), nil 571 } 572 // Content unavailable in memory, attempt to retrieve from disk 573 enc, err := db.diskDB.ReadTrieNodeFromOld(hash) 574 if err == nil && enc != nil { 575 db.setCachedNode(hash, enc) 576 recordTrieCacheMiss() 577 } 578 return enc, err 579 } 580 581 // DoesExistCachedNode returns if the node exists on cached trie node in memory. 582 func (db *Database) DoesExistCachedNode(hash common.ExtHash) bool { 583 // Retrieve the node from cache if available 584 db.lock.RLock() 585 _, ok := db.nodes[hash] 586 db.lock.RUnlock() 587 return ok 588 } 589 590 // DoesExistNodeInPersistent returns if the node exists on the persistent database or its cache. 591 func (db *Database) DoesExistNodeInPersistent(hash common.ExtHash) bool { 592 // Retrieve the node from DB cache if available 593 if enc := db.getCachedNode(hash); enc != nil { 594 return true 595 } 596 597 // Content unavailable in DB cache, attempt to retrieve from disk 598 enc, err := db.diskDB.ReadTrieNode(hash) 599 if err == nil && enc != nil { 600 return true 601 } 602 603 return false 604 } 605 606 // preimage retrieves a cached trie node pre-image from memory. If it cannot be 607 // found cached, the method queries the persistent database for the content. 608 func (db *Database) preimage(hash common.Hash) []byte { 609 // Retrieve the node from cache if available 610 db.lock.RLock() 611 preimage := db.preimages[hash] 612 db.lock.RUnlock() 613 614 if preimage != nil { 615 return preimage 616 } 617 // Content unavailable in memory, attempt to retrieve from disk 618 return db.diskDB.ReadPreimage(hash) 619 } 620 621 // Nodes retrieves the hashes of all the nodes cached within the memory database. 622 // This method is extremely expensive and should only be used to validate internal 623 // states in test code. 624 func (db *Database) Nodes() []common.ExtHash { 625 db.lock.RLock() 626 defer db.lock.RUnlock() 627 628 hashes := make([]common.ExtHash, 0, len(db.nodes)) 629 for hash := range db.nodes { 630 if !common.EmptyExtHash(hash) { // Special case for "root" references/nodes 631 hashes = append(hashes, hash) 632 } 633 } 634 return hashes 635 } 636 637 // Reference adds a new reference from a parent node to a child node. 638 // This function is used to add reference between internal trie node 639 // and external node(e.g. storage trie root), all internal trie nodes 640 // are referenced together by database itself. 641 // Use ReferenceRoot to reference a state root, otherwise use Reference. 642 func (db *Database) ReferenceRoot(root common.Hash) { 643 db.lock.Lock() 644 defer db.lock.Unlock() 645 646 db.reference(root.ExtendZero(), common.ExtHash{}) 647 } 648 649 // Reference adds a new reference from a parent node to a child node. 650 // This function is used to add reference between internal trie node 651 // and external node(e.g. storage trie root), all internal trie nodes 652 // are referenced together by database itself. 653 // Use ReferenceRoot to reference a state root, otherwise use Reference. 654 func (db *Database) Reference(child common.ExtHash, parent common.ExtHash) { 655 db.lock.Lock() 656 defer db.lock.Unlock() 657 658 db.reference(child, parent) 659 } 660 661 // reference is the private locked version of Reference. 662 func (db *Database) reference(child common.ExtHash, parent common.ExtHash) { 663 // If the node does not exist, it's a node pulled from disk, skip 664 node, ok := db.nodes[child] 665 if !ok { 666 return 667 } 668 // If the reference already exists, only duplicate for roots 669 if db.nodes[parent].children == nil { 670 db.nodes[parent].children = make(map[common.ExtHash]uint64) 671 } else if _, ok = db.nodes[parent].children[child]; ok && !common.EmptyExtHash(parent) { 672 return 673 } 674 node.parents++ 675 db.nodes[parent].children[child]++ 676 } 677 678 // Dereference removes an existing reference from a state root node. 679 func (db *Database) Dereference(root common.Hash) { 680 // Sanity check to ensure that the meta-root is not removed 681 if common.EmptyHash(root) { 682 logger.Error("Attempted to dereference the trie cache meta root") 683 return 684 } 685 686 db.gcLock.Lock() 687 defer db.gcLock.Unlock() 688 689 db.lock.Lock() 690 defer db.lock.Unlock() 691 692 nodes, storage, start := len(db.nodes), db.nodesSize, time.Now() 693 db.dereference(root.ExtendZero(), common.ExtHash{}) 694 695 db.gcnodes += uint64(nodes - len(db.nodes)) 696 db.gcsize += storage - db.nodesSize 697 db.gctime += time.Since(start) 698 699 memcacheGCTimeGauge.Update(int64(time.Since(start))) 700 memcacheGCSizeMeter.Mark(int64(storage - db.nodesSize)) 701 memcacheGCNodesMeter.Mark(int64(nodes - len(db.nodes))) 702 703 logger.Debug("Dereferenced trie from memory database", "nodes", nodes-len(db.nodes), "size", storage-db.nodesSize, "time", time.Since(start), 704 "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.nodes), "livesize", db.nodesSize) 705 } 706 707 // dereference is the private locked version of Dereference. 708 func (db *Database) dereference(child common.ExtHash, parent common.ExtHash) { 709 // Dereference the parent-child 710 node := db.nodes[parent] 711 712 if node.children != nil && node.children[child] > 0 { 713 node.children[child]-- 714 if node.children[child] == 0 { 715 delete(node.children, child) 716 } 717 } 718 // If the node does not exist, it's a previously committed node. 719 node, ok := db.nodes[child] 720 if !ok { 721 return 722 } 723 // If there are no more references to the child, delete it and cascade 724 if node.parents > 0 { 725 // This is a special cornercase where a node loaded from disk (i.e. not in the 726 // memcache any more) gets reinjected as a new node (short node split into full, 727 // then reverted into short), causing a cached node to have no parents. That is 728 // no problem in itself, but don't make maxint parents out of it. 729 node.parents-- 730 } 731 if node.parents == 0 { 732 // Remove the node from the flush-list 733 db.removeNodeInFlushList(child) 734 // Dereference all children and delete the node 735 for _, hash := range node.childs() { 736 db.dereference(hash, child) 737 } 738 delete(db.nodes, child) 739 db.nodesSize -= common.StorageSize(common.HashLength + int(node.size)) 740 } 741 } 742 743 // Cap iteratively flushes old but still referenced trie nodes until the total 744 // memory usage goes below the given threshold. 745 func (db *Database) Cap(limit common.StorageSize) error { 746 // Create a database batch to flush persistent data out. It is important that 747 // outside code doesn't see an inconsistent state (referenced data removed from 748 // memory cache during commit but not yet in persistent database). This is ensured 749 // by only uncaching existing data when the database write finalizes. 750 db.lock.RLock() 751 752 nodes, nodeSize, start := len(db.nodes), db.nodesSize, time.Now() 753 preimagesSize := db.preimagesSize 754 755 // db.nodesSize only contains the useful data in the cache, but when reporting 756 // the total memory consumption, the maintenance metadata is also needed to be 757 // counted. For every useful node, we track 2 extra hashes as the flushlist. 758 size := db.nodesSize + common.StorageSize((len(db.nodes)-1)*2*common.HashLength) 759 760 // If the preimage cache got large enough, push to disk. If it's still small 761 // leave for later to deduplicate writes. 762 flushPreimages := db.preimagesSize > 4*1024*1024 763 numPreimages := 0 764 if flushPreimages { 765 db.diskDB.WritePreimages(0, db.preimages) 766 numPreimages = len(db.preimages) 767 } 768 db.diskDB.WritePruningMarks(db.pruningMarks) 769 numPruningMarks := len(db.pruningMarks) 770 771 // Keep committing nodes from the flush-list until we're below allowance 772 oldest := db.oldest 773 batch := db.diskDB.NewBatch(database.StateTrieDB) 774 defer batch.Release() 775 for size > limit && !common.EmptyExtHash(oldest) { 776 // Fetch the oldest referenced node and push into the batch 777 node := db.nodes[oldest] 778 enc := node.rlp() 779 db.diskDB.PutTrieNodeToBatch(batch, oldest, enc) 780 if _, err := database.WriteBatchesOverThreshold(batch); err != nil { 781 db.lock.RUnlock() 782 return err 783 } 784 785 db.setCachedNode(oldest, enc) 786 // Iterate to the next flush item, or abort if the size cap was achieved. Size 787 // is the total size, including both the useful cached data (hash -> blob), as 788 // well as the flushlist metadata (2*hash). When flushing items from the cache, 789 // we need to reduce both. 790 size -= common.StorageSize(3*common.HashLength + int(node.size)) 791 oldest = node.flushNext 792 } 793 // Flush out any remainder data from the last batch 794 if _, err := database.WriteBatches(batch); err != nil { 795 logger.Error("Failed to write flush list to disk", "err", err) 796 db.lock.RUnlock() 797 return err 798 } 799 800 db.lock.RUnlock() 801 802 // Write successful, clear out the flushed data 803 db.lock.Lock() 804 defer db.lock.Unlock() 805 806 if flushPreimages { 807 db.preimages = make(map[common.Hash][]byte) 808 db.preimagesSize = 0 809 } 810 db.pruningMarks = []database.PruningMark{} 811 812 for db.oldest != oldest { 813 node := db.nodes[db.oldest] 814 delete(db.nodes, db.oldest) 815 db.oldest = node.flushNext 816 817 db.nodesSize -= common.StorageSize(common.HashLength + int(node.size)) 818 } 819 if !common.EmptyExtHash(db.oldest) { 820 db.nodes[db.oldest].flushPrev = common.ExtHash{} 821 } else { 822 db.newest = common.ExtHash{} 823 } 824 db.flushnodes += uint64(nodes - len(db.nodes)) 825 db.flushsize += nodeSize - db.nodesSize 826 db.flushtime += time.Since(start) 827 828 memcacheFlushTimeGauge.Update(int64(time.Since(start))) 829 memcacheFlushSizeGauge.Update(int64(nodeSize - db.nodesSize)) 830 memcacheFlushNodesGauge.Update(int64(nodes - len(db.nodes))) 831 832 logger.Info("Persisted nodes from memory database by Cap", "nodes", nodes-len(db.nodes), 833 "size", nodeSize-db.nodesSize, "preimagesSize", preimagesSize-db.preimagesSize, "time", time.Since(start), 834 "flushnodes", db.flushnodes, "flushsize", db.flushsize, "flushtime", db.flushtime, "livenodes", len(db.nodes), 835 "livesize", db.nodesSize, "preimages", numPreimages, "pruningMarks", numPruningMarks) 836 return nil 837 } 838 839 // commitResult contains the result from concurrent commit calls. 840 // key and val are nil if the commitResult indicates the end of 841 // concurrentCommit goroutine. 842 type commitResult struct { 843 hash common.ExtHash 844 val []byte 845 } 846 847 func (db *Database) writeBatchNodes(node common.ExtHash) error { 848 rootNode, ok := db.nodes[node] 849 if !ok { 850 return nil 851 } 852 853 // To limit the size of commitResult channel, we use commitResultChSizeLimit here. 854 var resultCh chan commitResult 855 if len(db.nodes) > commitResultChSizeLimit { 856 resultCh = make(chan commitResult, commitResultChSizeLimit) 857 } else { 858 resultCh = make(chan commitResult, len(db.nodes)) 859 } 860 numGoRoutines := len(rootNode.childs()) 861 for i, child := range rootNode.childs() { 862 go db.concurrentCommit(child, resultCh, i) 863 } 864 865 batch := db.diskDB.NewBatch(database.StateTrieDB) 866 defer batch.Release() 867 for numGoRoutines > 0 { 868 result := <-resultCh 869 if common.EmptyExtHash(result.hash) && result.val == nil { 870 numGoRoutines-- 871 continue 872 } 873 874 db.diskDB.PutTrieNodeToBatch(batch, result.hash, result.val) 875 if _, err := database.WriteBatchesOverThreshold(batch); err != nil { 876 return err 877 } 878 } 879 880 enc := rootNode.rlp() 881 db.diskDB.PutTrieNodeToBatch(batch, node, enc) 882 if err := batch.Write(); err != nil { 883 logger.Error("Failed to write trie to disk", "err", err) 884 return err 885 } 886 db.setCachedNode(node, enc) 887 888 return nil 889 } 890 891 func (db *Database) concurrentCommit(hash common.ExtHash, resultCh chan<- commitResult, childIndex int) { 892 logger.Trace("concurrentCommit start", "childIndex", childIndex) 893 defer logger.Trace("concurrentCommit end", "childIndex", childIndex) 894 db.commit(hash, resultCh) 895 resultCh <- commitResult{common.ExtHash{}, nil} 896 } 897 898 // Commit iterates over all the children of a particular node, writes them out 899 // to disk, forcefully tearing down all references in both directions. 900 // The root must be a state root. 901 // 902 // As a side effect, all pre-images accumulated up to this point are also written. 903 func (db *Database) Commit(root common.Hash, report bool, blockNum uint64) error { 904 hash := root.ExtendZero() 905 // Create a database batch to flush persistent data out. It is important that 906 // outside code doesn't see an inconsistent state (referenced data removed from 907 // memory cache during commit but not yet in persistent database). This is ensured 908 // by only uncaching existing data when the database write finalizes. 909 db.lock.RLock() 910 911 commitStart := time.Now() 912 db.diskDB.WritePreimages(0, db.preimages) 913 db.diskDB.WritePruningMarks(db.pruningMarks) 914 numPreimages := len(db.preimages) 915 numPruningMarks := len(db.pruningMarks) 916 917 // Move the trie itself into the batch, flushing if enough data is accumulated 918 numNodes, nodesSize := len(db.nodes), db.nodesSize 919 if err := db.writeBatchNodes(hash); err != nil { 920 db.lock.RUnlock() 921 return err 922 } 923 924 db.lock.RUnlock() 925 926 // Write successful, clear out the flushed data 927 db.lock.Lock() 928 defer db.lock.Unlock() 929 930 db.preimages = make(map[common.Hash][]byte) 931 db.preimagesSize = 0 932 db.pruningMarks = []database.PruningMark{} 933 934 uncacheStart := time.Now() 935 db.uncache(hash) 936 commitEnd := time.Now() 937 938 memcacheUncacheTimeGauge.Update(int64(commitEnd.Sub(uncacheStart))) 939 memcacheCommitTimeGauge.Update(int64(commitEnd.Sub(commitStart))) 940 memcacheCommitSizeMeter.Mark(int64(nodesSize - db.nodesSize)) 941 memcacheCommitNodesMeter.Mark(int64(numNodes - len(db.nodes))) 942 943 localLogger := logger.Info 944 if !report { 945 localLogger = logger.Debug 946 } 947 localLogger("Persisted trie from memory database", "blockNum", blockNum, 948 "updated nodes", numNodes-len(db.nodes), "updated nodes size", nodesSize-db.nodesSize, 949 "time", commitEnd.Sub(commitStart), "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, 950 "livenodes", len(db.nodes), "livesize", db.nodesSize, "preimages", numPreimages, "pruningMarks", numPruningMarks) 951 952 // Reset the garbage collection statistics 953 db.gcnodes, db.gcsize, db.gctime = 0, 0, 0 954 db.flushnodes, db.flushsize, db.flushtime = 0, 0, 0 955 return nil 956 } 957 958 // commit iteratively encodes nodes from parents to child nodes. 959 func (db *Database) commit(hash common.ExtHash, resultCh chan<- commitResult) { 960 node, ok := db.nodes[hash] 961 if !ok { 962 return 963 } 964 for _, child := range node.childs() { 965 db.commit(child, resultCh) 966 } 967 enc := node.rlp() 968 resultCh <- commitResult{hash, enc} 969 970 db.setCachedNode(hash, enc) 971 } 972 973 // uncache is the post-processing step of a commit operation where the already 974 // persisted trie is removed from the cache. The reason behind the two-phase 975 // commit is to ensure consistent data availability while moving from memory 976 // to disk. 977 func (db *Database) uncache(hash common.ExtHash) { 978 // If the node does not exists, we're done on this path 979 node, ok := db.nodes[hash] 980 if !ok { 981 return 982 } 983 // Node still exists, remove it from the flush-list 984 db.removeNodeInFlushList(hash) 985 // Uncache the node's subtries and remove the node itself too 986 for _, child := range node.childs() { 987 db.uncache(child) 988 } 989 delete(db.nodes, hash) 990 db.nodesSize -= common.StorageSize(common.HashLength + int(node.size)) 991 } 992 993 // Size returns the current database size of the memory cache in front of the 994 // persistent database layer. 995 func (db *Database) Size() (common.StorageSize, common.StorageSize, common.StorageSize) { 996 db.lock.RLock() 997 defer db.lock.RUnlock() 998 999 // db.nodesSize only contains the useful data in the cache, but when reporting 1000 // the total memory consumption, the maintenance metadata is also needed to be 1001 // counted. For every useful node, we track 2 extra hashes as the flushlist. 1002 flushlistSize := common.StorageSize((len(db.nodes) - 1) * 2 * common.HashLength) 1003 return db.nodesSize + flushlistSize, db.nodesSize, db.preimagesSize 1004 } 1005 1006 // verifyIntegrity is a debug method to iterate over the entire trie stored in 1007 // memory and check whether every node is reachable from the meta root. The goal 1008 // is to find any errors that might cause memory leaks and or trie nodes to go 1009 // missing. 1010 // 1011 // This method is extremely CPU and memory intensive, only use when must. 1012 func (db *Database) verifyIntegrity() { 1013 // Iterate over all the cached nodes and accumulate them into a set 1014 reachable := map[common.ExtHash]struct{}{{}: {}} 1015 1016 for child := range db.nodes[common.ExtHash{}].children { 1017 db.accumulate(child, reachable) 1018 } 1019 // Find any unreachable but cached nodes 1020 unreachable := []string{} 1021 for hash, node := range db.nodes { 1022 if _, ok := reachable[hash]; !ok { 1023 unreachable = append(unreachable, fmt.Sprintf("%x: {Node: %v, Parents: %d, Prev: %x, Next: %x}", 1024 hash, node.node, node.parents, node.flushPrev, node.flushNext)) 1025 } 1026 } 1027 if len(unreachable) != 0 { 1028 panic(fmt.Sprintf("trie cache memory leak: %v", unreachable)) 1029 } 1030 } 1031 1032 // accumulate iterates over the trie defined by hash and accumulates all the 1033 // cached children found in memory. 1034 func (db *Database) accumulate(hash common.ExtHash, reachable map[common.ExtHash]struct{}) { 1035 // Mark the node reachable if present in the memory cache 1036 node, ok := db.nodes[hash] 1037 if !ok { 1038 return 1039 } 1040 reachable[hash] = struct{}{} 1041 1042 // Iterate over all the children and accumulate them too 1043 for _, child := range node.childs() { 1044 db.accumulate(child, reachable) 1045 } 1046 } 1047 1048 func (db *Database) removeNodeInFlushList(hash common.ExtHash) { 1049 node, ok := db.nodes[hash] 1050 if !ok { 1051 return 1052 } 1053 1054 if hash == db.oldest && hash == db.newest { 1055 db.oldest = common.ExtHash{} 1056 db.newest = common.ExtHash{} 1057 } else if hash == db.oldest { 1058 db.oldest = node.flushNext 1059 db.nodes[node.flushNext].flushPrev = common.ExtHash{} 1060 } else if hash == db.newest { 1061 db.newest = node.flushPrev 1062 db.nodes[node.flushPrev].flushNext = common.ExtHash{} 1063 } else { 1064 db.nodes[node.flushPrev].flushNext = node.flushNext 1065 db.nodes[node.flushNext].flushPrev = node.flushPrev 1066 } 1067 } 1068 1069 func (db *Database) getLastNodeHashInFlushList() common.ExtHash { 1070 var lastNodeHash common.ExtHash 1071 nodeHash := db.oldest 1072 for { 1073 if _, ok := db.nodes[nodeHash]; ok { 1074 lastNodeHash = nodeHash 1075 } else { 1076 logger.Debug("not found next noode in map of flush list") 1077 break 1078 } 1079 1080 if !common.EmptyExtHash(db.nodes[nodeHash].flushNext) { 1081 nodeHash = db.nodes[nodeHash].flushNext 1082 } else { 1083 logger.Debug("found last noode in map of flush list") 1084 break 1085 } 1086 } 1087 return lastNodeHash 1088 } 1089 1090 // UpdateMetricNodes updates the size of Database.nodes 1091 func (db *Database) UpdateMetricNodes() { 1092 memcacheNodesGauge.Update(int64(len(db.nodes))) 1093 if db.trieNodeCache != nil { 1094 db.trieNodeCache.UpdateStats() 1095 } 1096 } 1097 1098 var ( 1099 errDisabledTrieNodeCache = errors.New("trie node cache is disabled, nothing to save to file") 1100 errSavingTrieNodeCacheInProgress = errors.New("saving trie node cache has been triggered already") 1101 ) 1102 1103 func (db *Database) CanSaveTrieNodeCacheToFile() error { 1104 if db.trieNodeCache == nil { 1105 return errDisabledTrieNodeCache 1106 } 1107 if db.savingTrieNodeCacheTriggered { 1108 return errSavingTrieNodeCacheInProgress 1109 } 1110 return nil 1111 } 1112 1113 // SaveTrieNodeCacheToFile saves the current cached trie nodes to file to reuse when the node restarts 1114 func (db *Database) SaveTrieNodeCacheToFile(filePath string, concurrency int) { 1115 db.savingTrieNodeCacheTriggered = true 1116 start := time.Now() 1117 logger.Info("start saving cache to file", 1118 "filePath", filePath, "concurrency", concurrency) 1119 if err := db.trieNodeCache.SaveToFile(filePath, concurrency); err != nil { 1120 logger.Error("failed to save cache to file", 1121 "filePath", filePath, "elapsed", time.Since(start), "err", err) 1122 } else { 1123 logger.Info("successfully saved cache to file", 1124 "filePath", filePath, "elapsed", time.Since(start)) 1125 } 1126 db.savingTrieNodeCacheTriggered = false 1127 } 1128 1129 // DumpPeriodically atomically saves fast cache data to the given dir with the specified interval. 1130 func (db *Database) SaveCachePeriodically(c *TrieNodeCacheConfig, stopCh <-chan struct{}) { 1131 rand.Seed(time.Now().UnixNano()) 1132 randomVal := 0.5 + rand.Float64()/2.0 // 0.5 <= randomVal < 1.0 1133 startTime := time.Duration(int(randomVal * float64(c.FastCacheSavePeriod))) 1134 logger.Info("first periodic cache saving will be triggered", "after", startTime) 1135 1136 timer := time.NewTimer(startTime) 1137 defer timer.Stop() 1138 1139 for { 1140 select { 1141 case <-timer.C: 1142 if err := db.CanSaveTrieNodeCacheToFile(); err != nil { 1143 logger.Warn("failed to trigger periodic cache saving", "err", err) 1144 continue 1145 } 1146 db.SaveTrieNodeCacheToFile(c.FastCacheFileDir, 1) 1147 timer.Reset(c.FastCacheSavePeriod) 1148 case <-stopCh: 1149 return 1150 } 1151 } 1152 } 1153 1154 // NodeInfo is a struct used for collecting trie statistics 1155 type NodeInfo struct { 1156 Depth int // 0 if not a leaf node 1157 Finished bool // true if the uppermost call is finished 1158 } 1159 1160 // CollectChildrenStats collects the depth of the trie recursively 1161 func (db *Database) CollectChildrenStats(node common.ExtHash, depth int, resultCh chan<- NodeInfo) { 1162 n, _ := db.node(node) 1163 if n == nil { 1164 return 1165 } 1166 // retrieve the children of the given node 1167 childrenNodes, err := db.NodeChildren(node) 1168 if err != nil { 1169 logger.Error("failed to retrieve the children nodes", 1170 "node", node.String(), "err", err) 1171 return 1172 } 1173 // write the depth of the node only if the node is a leaf node, otherwise set 0 1174 resultDepth := 0 1175 if len(childrenNodes) == 0 { 1176 resultDepth = depth 1177 } 1178 // send the result to the channel and iterate its children 1179 resultCh <- NodeInfo{Depth: resultDepth} 1180 for _, child := range childrenNodes { 1181 db.CollectChildrenStats(child, depth+1, resultCh) 1182 } 1183 }