github.com/rayrapetyan/go-ethereum@v1.8.21/trie/database.go (about) 1 // Copyright 2018 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package trie 18 19 import ( 20 "fmt" 21 "io" 22 "sync" 23 "time" 24 25 "github.com/allegro/bigcache" 26 "github.com/ethereum/go-ethereum/common" 27 "github.com/ethereum/go-ethereum/ethdb" 28 "github.com/ethereum/go-ethereum/log" 29 "github.com/ethereum/go-ethereum/metrics" 30 "github.com/ethereum/go-ethereum/rlp" 31 ) 32 33 var ( 34 memcacheCleanHitMeter = metrics.NewRegisteredMeter("trie/memcache/clean/hit", nil) 35 memcacheCleanMissMeter = metrics.NewRegisteredMeter("trie/memcache/clean/miss", nil) 36 memcacheCleanReadMeter = metrics.NewRegisteredMeter("trie/memcache/clean/read", nil) 37 memcacheCleanWriteMeter = metrics.NewRegisteredMeter("trie/memcache/clean/write", nil) 38 39 memcacheFlushTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/flush/time", nil) 40 memcacheFlushNodesMeter = metrics.NewRegisteredMeter("trie/memcache/flush/nodes", nil) 41 memcacheFlushSizeMeter = metrics.NewRegisteredMeter("trie/memcache/flush/size", nil) 42 43 memcacheGCTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/gc/time", nil) 44 memcacheGCNodesMeter = metrics.NewRegisteredMeter("trie/memcache/gc/nodes", nil) 45 memcacheGCSizeMeter = metrics.NewRegisteredMeter("trie/memcache/gc/size", nil) 46 47 memcacheCommitTimeTimer = metrics.NewRegisteredResettingTimer("trie/memcache/commit/time", nil) 48 memcacheCommitNodesMeter = metrics.NewRegisteredMeter("trie/memcache/commit/nodes", nil) 49 memcacheCommitSizeMeter = metrics.NewRegisteredMeter("trie/memcache/commit/size", nil) 50 ) 51 52 // secureKeyPrefix is the database key prefix used to store trie node preimages. 53 var secureKeyPrefix = []byte("secure-key-") 54 55 // secureKeyLength is the length of the above prefix + 32byte hash. 56 const secureKeyLength = 11 + 32 57 58 // DatabaseReader wraps the Get and Has method of a backing store for the trie. 59 type DatabaseReader interface { 60 // Get retrieves the value associated with key from the database. 61 Get(key []byte) (value []byte, err error) 62 63 // Has retrieves whether a key is present in the database. 64 Has(key []byte) (bool, error) 65 } 66 67 // Database is an intermediate write layer between the trie data structures and 68 // the disk database. The aim is to accumulate trie writes in-memory and only 69 // periodically flush a couple tries to disk, garbage collecting the remainder. 70 type Database struct { 71 diskdb ethdb.Database // Persistent storage for matured trie nodes 72 73 cleans *bigcache.BigCache // GC friendly memory cache of clean node RLPs 74 dirties map[common.Hash]*cachedNode // Data and references relationships of dirty nodes 75 oldest common.Hash // Oldest tracked node, flush-list head 76 newest common.Hash // Newest tracked node, flush-list tail 77 78 preimages map[common.Hash][]byte // Preimages of nodes from the secure trie 79 seckeybuf [secureKeyLength]byte // Ephemeral buffer for calculating preimage keys 80 81 gctime time.Duration // Time spent on garbage collection since last commit 82 gcnodes uint64 // Nodes garbage collected since last commit 83 gcsize common.StorageSize // Data storage garbage collected since last commit 84 85 flushtime time.Duration // Time spent on data flushing since last commit 86 flushnodes uint64 // Nodes flushed since last commit 87 flushsize common.StorageSize // Data storage flushed since last commit 88 89 dirtiesSize common.StorageSize // Storage size of the dirty node cache (exc. flushlist) 90 preimagesSize common.StorageSize // Storage size of the preimages cache 91 92 lock sync.RWMutex 93 } 94 95 // rawNode is a simple binary blob used to differentiate between collapsed trie 96 // nodes and already encoded RLP binary blobs (while at the same time store them 97 // in the same cache fields). 98 type rawNode []byte 99 100 func (n rawNode) canUnload(uint16, uint16) bool { panic("this should never end up in a live trie") } 101 func (n rawNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } 102 func (n rawNode) fstring(ind string) string { panic("this should never end up in a live trie") } 103 104 // rawFullNode represents only the useful data content of a full node, with the 105 // caches and flags stripped out to minimize its data storage. This type honors 106 // the same RLP encoding as the original parent. 107 type rawFullNode [17]node 108 109 func (n rawFullNode) canUnload(uint16, uint16) bool { panic("this should never end up in a live trie") } 110 func (n rawFullNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } 111 func (n rawFullNode) fstring(ind string) string { panic("this should never end up in a live trie") } 112 113 func (n rawFullNode) EncodeRLP(w io.Writer) error { 114 var nodes [17]node 115 116 for i, child := range n { 117 if child != nil { 118 nodes[i] = child 119 } else { 120 nodes[i] = nilValueNode 121 } 122 } 123 return rlp.Encode(w, nodes) 124 } 125 126 // rawShortNode represents only the useful data content of a short node, with the 127 // caches and flags stripped out to minimize its data storage. This type honors 128 // the same RLP encoding as the original parent. 129 type rawShortNode struct { 130 Key []byte 131 Val node 132 } 133 134 func (n rawShortNode) canUnload(uint16, uint16) bool { panic("this should never end up in a live trie") } 135 func (n rawShortNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } 136 func (n rawShortNode) fstring(ind string) string { panic("this should never end up in a live trie") } 137 138 // cachedNode is all the information we know about a single cached node in the 139 // memory database write layer. 140 type cachedNode struct { 141 node node // Cached collapsed trie node, or raw rlp data 142 size uint16 // Byte size of the useful cached data 143 144 parents uint32 // Number of live nodes referencing this one 145 children map[common.Hash]uint16 // External children referenced by this node 146 147 flushPrev common.Hash // Previous node in the flush-list 148 flushNext common.Hash // Next node in the flush-list 149 } 150 151 // rlp returns the raw rlp encoded blob of the cached node, either directly from 152 // the cache, or by regenerating it from the collapsed node. 153 func (n *cachedNode) rlp() []byte { 154 if node, ok := n.node.(rawNode); ok { 155 return node 156 } 157 blob, err := rlp.EncodeToBytes(n.node) 158 if err != nil { 159 panic(err) 160 } 161 return blob 162 } 163 164 // obj returns the decoded and expanded trie node, either directly from the cache, 165 // or by regenerating it from the rlp encoded blob. 166 func (n *cachedNode) obj(hash common.Hash, cachegen uint16) node { 167 if node, ok := n.node.(rawNode); ok { 168 return mustDecodeNode(hash[:], node, cachegen) 169 } 170 return expandNode(hash[:], n.node, cachegen) 171 } 172 173 // childs returns all the tracked children of this node, both the implicit ones 174 // from inside the node as well as the explicit ones from outside the node. 175 func (n *cachedNode) childs() []common.Hash { 176 children := make([]common.Hash, 0, 16) 177 for child := range n.children { 178 children = append(children, child) 179 } 180 if _, ok := n.node.(rawNode); !ok { 181 gatherChildren(n.node, &children) 182 } 183 return children 184 } 185 186 // gatherChildren traverses the node hierarchy of a collapsed storage node and 187 // retrieves all the hashnode children. 188 func gatherChildren(n node, children *[]common.Hash) { 189 switch n := n.(type) { 190 case *rawShortNode: 191 gatherChildren(n.Val, children) 192 193 case rawFullNode: 194 for i := 0; i < 16; i++ { 195 gatherChildren(n[i], children) 196 } 197 case hashNode: 198 *children = append(*children, common.BytesToHash(n)) 199 200 case valueNode, nil: 201 202 default: 203 panic(fmt.Sprintf("unknown node type: %T", n)) 204 } 205 } 206 207 // simplifyNode traverses the hierarchy of an expanded memory node and discards 208 // all the internal caches, returning a node that only contains the raw data. 209 func simplifyNode(n node) node { 210 switch n := n.(type) { 211 case *shortNode: 212 // Short nodes discard the flags and cascade 213 return &rawShortNode{Key: n.Key, Val: simplifyNode(n.Val)} 214 215 case *fullNode: 216 // Full nodes discard the flags and cascade 217 node := rawFullNode(n.Children) 218 for i := 0; i < len(node); i++ { 219 if node[i] != nil { 220 node[i] = simplifyNode(node[i]) 221 } 222 } 223 return node 224 225 case valueNode, hashNode, rawNode: 226 return n 227 228 default: 229 panic(fmt.Sprintf("unknown node type: %T", n)) 230 } 231 } 232 233 // expandNode traverses the node hierarchy of a collapsed storage node and converts 234 // all fields and keys into expanded memory form. 235 func expandNode(hash hashNode, n node, cachegen uint16) node { 236 switch n := n.(type) { 237 case *rawShortNode: 238 // Short nodes need key and child expansion 239 return &shortNode{ 240 Key: compactToHex(n.Key), 241 Val: expandNode(nil, n.Val, cachegen), 242 flags: nodeFlag{ 243 hash: hash, 244 gen: cachegen, 245 }, 246 } 247 248 case rawFullNode: 249 // Full nodes need child expansion 250 node := &fullNode{ 251 flags: nodeFlag{ 252 hash: hash, 253 gen: cachegen, 254 }, 255 } 256 for i := 0; i < len(node.Children); i++ { 257 if n[i] != nil { 258 node.Children[i] = expandNode(nil, n[i], cachegen) 259 } 260 } 261 return node 262 263 case valueNode, hashNode: 264 return n 265 266 default: 267 panic(fmt.Sprintf("unknown node type: %T", n)) 268 } 269 } 270 271 // NewDatabase creates a new trie database to store ephemeral trie content before 272 // its written out to disk or garbage collected. No read cache is created, so all 273 // data retrievals will hit the underlying disk database. 274 func NewDatabase(diskdb ethdb.Database) *Database { 275 return NewDatabaseWithCache(diskdb, 0) 276 } 277 278 // NewDatabaseWithCache creates a new trie database to store ephemeral trie content 279 // before its written out to disk or garbage collected. It also acts as a read cache 280 // for nodes loaded from disk. 281 func NewDatabaseWithCache(diskdb ethdb.Database, cache int) *Database { 282 var cleans *bigcache.BigCache 283 if cache > 0 { 284 cleans, _ = bigcache.NewBigCache(bigcache.Config{ 285 Shards: 1024, 286 LifeWindow: time.Hour, 287 MaxEntriesInWindow: cache * 1024, 288 MaxEntrySize: 512, 289 HardMaxCacheSize: cache, 290 }) 291 } 292 return &Database{ 293 diskdb: diskdb, 294 cleans: cleans, 295 dirties: map[common.Hash]*cachedNode{{}: {}}, 296 preimages: make(map[common.Hash][]byte), 297 } 298 } 299 300 // DiskDB retrieves the persistent storage backing the trie database. 301 func (db *Database) DiskDB() DatabaseReader { 302 return db.diskdb 303 } 304 305 // InsertBlob writes a new reference tracked blob to the memory database if it's 306 // yet unknown. This method should only be used for non-trie nodes that require 307 // reference counting, since trie nodes are garbage collected directly through 308 // their embedded children. 309 func (db *Database) InsertBlob(hash common.Hash, blob []byte) { 310 db.lock.Lock() 311 defer db.lock.Unlock() 312 313 db.insert(hash, blob, rawNode(blob)) 314 } 315 316 // insert inserts a collapsed trie node into the memory database. This method is 317 // a more generic version of InsertBlob, supporting both raw blob insertions as 318 // well ex trie node insertions. The blob must always be specified to allow proper 319 // size tracking. 320 func (db *Database) insert(hash common.Hash, blob []byte, node node) { 321 // If the node's already cached, skip 322 if _, ok := db.dirties[hash]; ok { 323 return 324 } 325 // Create the cached entry for this node 326 entry := &cachedNode{ 327 node: simplifyNode(node), 328 size: uint16(len(blob)), 329 flushPrev: db.newest, 330 } 331 for _, child := range entry.childs() { 332 if c := db.dirties[child]; c != nil { 333 c.parents++ 334 } 335 } 336 db.dirties[hash] = entry 337 338 // Update the flush-list endpoints 339 if db.oldest == (common.Hash{}) { 340 db.oldest, db.newest = hash, hash 341 } else { 342 db.dirties[db.newest].flushNext, db.newest = hash, hash 343 } 344 db.dirtiesSize += common.StorageSize(common.HashLength + entry.size) 345 } 346 347 // insertPreimage writes a new trie node pre-image to the memory database if it's 348 // yet unknown. The method will make a copy of the slice. 349 // 350 // Note, this method assumes that the database's lock is held! 351 func (db *Database) insertPreimage(hash common.Hash, preimage []byte) { 352 if _, ok := db.preimages[hash]; ok { 353 return 354 } 355 db.preimages[hash] = common.CopyBytes(preimage) 356 db.preimagesSize += common.StorageSize(common.HashLength + len(preimage)) 357 } 358 359 // node retrieves a cached trie node from memory, or returns nil if none can be 360 // found in the memory cache. 361 func (db *Database) node(hash common.Hash, cachegen uint16) node { 362 // Retrieve the node from the clean cache if available 363 if db.cleans != nil { 364 if enc, err := db.cleans.Get(string(hash[:])); err == nil && enc != nil { 365 memcacheCleanHitMeter.Mark(1) 366 memcacheCleanReadMeter.Mark(int64(len(enc))) 367 return mustDecodeNode(hash[:], enc, cachegen) 368 } 369 } 370 // Retrieve the node from the dirty cache if available 371 db.lock.RLock() 372 dirty := db.dirties[hash] 373 db.lock.RUnlock() 374 375 if dirty != nil { 376 return dirty.obj(hash, cachegen) 377 } 378 // Content unavailable in memory, attempt to retrieve from disk 379 enc, err := db.diskdb.Get(hash[:]) 380 if err != nil || enc == nil { 381 return nil 382 } 383 if db.cleans != nil { 384 db.cleans.Set(string(hash[:]), enc) 385 memcacheCleanMissMeter.Mark(1) 386 memcacheCleanWriteMeter.Mark(int64(len(enc))) 387 } 388 return mustDecodeNode(hash[:], enc, cachegen) 389 } 390 391 // Node retrieves an encoded cached trie node from memory. If it cannot be found 392 // cached, the method queries the persistent database for the content. 393 func (db *Database) Node(hash common.Hash) ([]byte, error) { 394 // Retrieve the node from the clean cache if available 395 if db.cleans != nil { 396 if enc, err := db.cleans.Get(string(hash[:])); err == nil && enc != nil { 397 memcacheCleanHitMeter.Mark(1) 398 memcacheCleanReadMeter.Mark(int64(len(enc))) 399 return enc, nil 400 } 401 } 402 // Retrieve the node from the dirty cache if available 403 db.lock.RLock() 404 dirty := db.dirties[hash] 405 db.lock.RUnlock() 406 407 if dirty != nil { 408 return dirty.rlp(), nil 409 } 410 // Content unavailable in memory, attempt to retrieve from disk 411 enc, err := db.diskdb.Get(hash[:]) 412 if err == nil && enc != nil { 413 if db.cleans != nil { 414 db.cleans.Set(string(hash[:]), enc) 415 memcacheCleanMissMeter.Mark(1) 416 memcacheCleanWriteMeter.Mark(int64(len(enc))) 417 } 418 } 419 return enc, err 420 } 421 422 // preimage retrieves a cached trie node pre-image from memory. If it cannot be 423 // found cached, the method queries the persistent database for the content. 424 func (db *Database) preimage(hash common.Hash) ([]byte, error) { 425 // Retrieve the node from cache if available 426 db.lock.RLock() 427 preimage := db.preimages[hash] 428 db.lock.RUnlock() 429 430 if preimage != nil { 431 return preimage, nil 432 } 433 // Content unavailable in memory, attempt to retrieve from disk 434 return db.diskdb.Get(db.secureKey(hash[:])) 435 } 436 437 // secureKey returns the database key for the preimage of key, as an ephemeral 438 // buffer. The caller must not hold onto the return value because it will become 439 // invalid on the next call. 440 func (db *Database) secureKey(key []byte) []byte { 441 buf := append(db.seckeybuf[:0], secureKeyPrefix...) 442 buf = append(buf, key...) 443 return buf 444 } 445 446 // Nodes retrieves the hashes of all the nodes cached within the memory database. 447 // This method is extremely expensive and should only be used to validate internal 448 // states in test code. 449 func (db *Database) Nodes() []common.Hash { 450 db.lock.RLock() 451 defer db.lock.RUnlock() 452 453 var hashes = make([]common.Hash, 0, len(db.dirties)) 454 for hash := range db.dirties { 455 if hash != (common.Hash{}) { // Special case for "root" references/nodes 456 hashes = append(hashes, hash) 457 } 458 } 459 return hashes 460 } 461 462 // Reference adds a new reference from a parent node to a child node. 463 func (db *Database) Reference(child common.Hash, parent common.Hash) { 464 db.lock.RLock() 465 defer db.lock.RUnlock() 466 467 db.reference(child, parent) 468 } 469 470 // reference is the private locked version of Reference. 471 func (db *Database) reference(child common.Hash, parent common.Hash) { 472 // If the node does not exist, it's a node pulled from disk, skip 473 node, ok := db.dirties[child] 474 if !ok { 475 return 476 } 477 // If the reference already exists, only duplicate for roots 478 if db.dirties[parent].children == nil { 479 db.dirties[parent].children = make(map[common.Hash]uint16) 480 } else if _, ok = db.dirties[parent].children[child]; ok && parent != (common.Hash{}) { 481 return 482 } 483 node.parents++ 484 db.dirties[parent].children[child]++ 485 } 486 487 // Dereference removes an existing reference from a root node. 488 func (db *Database) Dereference(root common.Hash) { 489 // Sanity check to ensure that the meta-root is not removed 490 if root == (common.Hash{}) { 491 log.Error("Attempted to dereference the trie cache meta root") 492 return 493 } 494 db.lock.Lock() 495 defer db.lock.Unlock() 496 497 nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now() 498 db.dereference(root, common.Hash{}) 499 500 db.gcnodes += uint64(nodes - len(db.dirties)) 501 db.gcsize += storage - db.dirtiesSize 502 db.gctime += time.Since(start) 503 504 memcacheGCTimeTimer.Update(time.Since(start)) 505 memcacheGCSizeMeter.Mark(int64(storage - db.dirtiesSize)) 506 memcacheGCNodesMeter.Mark(int64(nodes - len(db.dirties))) 507 508 log.Debug("Dereferenced trie from memory database", "nodes", nodes-len(db.dirties), "size", storage-db.dirtiesSize, "time", time.Since(start), 509 "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) 510 } 511 512 // dereference is the private locked version of Dereference. 513 func (db *Database) dereference(child common.Hash, parent common.Hash) { 514 // Dereference the parent-child 515 node := db.dirties[parent] 516 517 if node.children != nil && node.children[child] > 0 { 518 node.children[child]-- 519 if node.children[child] == 0 { 520 delete(node.children, child) 521 } 522 } 523 // If the child does not exist, it's a previously committed node. 524 node, ok := db.dirties[child] 525 if !ok { 526 return 527 } 528 // If there are no more references to the child, delete it and cascade 529 if node.parents > 0 { 530 // This is a special cornercase where a node loaded from disk (i.e. not in the 531 // memcache any more) gets reinjected as a new node (short node split into full, 532 // then reverted into short), causing a cached node to have no parents. That is 533 // no problem in itself, but don't make maxint parents out of it. 534 node.parents-- 535 } 536 if node.parents == 0 { 537 // Remove the node from the flush-list 538 switch child { 539 case db.oldest: 540 db.oldest = node.flushNext 541 db.dirties[node.flushNext].flushPrev = common.Hash{} 542 case db.newest: 543 db.newest = node.flushPrev 544 db.dirties[node.flushPrev].flushNext = common.Hash{} 545 default: 546 db.dirties[node.flushPrev].flushNext = node.flushNext 547 db.dirties[node.flushNext].flushPrev = node.flushPrev 548 } 549 // Dereference all children and delete the node 550 for _, hash := range node.childs() { 551 db.dereference(hash, child) 552 } 553 delete(db.dirties, child) 554 db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size)) 555 } 556 } 557 558 // Cap iteratively flushes old but still referenced trie nodes until the total 559 // memory usage goes below the given threshold. 560 func (db *Database) Cap(limit common.StorageSize) error { 561 // Create a database batch to flush persistent data out. It is important that 562 // outside code doesn't see an inconsistent state (referenced data removed from 563 // memory cache during commit but not yet in persistent storage). This is ensured 564 // by only uncaching existing data when the database write finalizes. 565 db.lock.RLock() 566 567 nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now() 568 batch := db.diskdb.NewBatch() 569 570 // db.dirtiesSize only contains the useful data in the cache, but when reporting 571 // the total memory consumption, the maintenance metadata is also needed to be 572 // counted. For every useful node, we track 2 extra hashes as the flushlist. 573 size := db.dirtiesSize + common.StorageSize((len(db.dirties)-1)*2*common.HashLength) 574 575 // If the preimage cache got large enough, push to disk. If it's still small 576 // leave for later to deduplicate writes. 577 flushPreimages := db.preimagesSize > 4*1024*1024 578 if flushPreimages { 579 for hash, preimage := range db.preimages { 580 if err := batch.Put(db.secureKey(hash[:]), preimage); err != nil { 581 log.Error("Failed to commit preimage from trie database", "err", err) 582 db.lock.RUnlock() 583 return err 584 } 585 if batch.ValueSize() > ethdb.IdealBatchSize { 586 if err := batch.Write(); err != nil { 587 db.lock.RUnlock() 588 return err 589 } 590 batch.Reset() 591 } 592 } 593 } 594 // Keep committing nodes from the flush-list until we're below allowance 595 oldest := db.oldest 596 for size > limit && oldest != (common.Hash{}) { 597 // Fetch the oldest referenced node and push into the batch 598 node := db.dirties[oldest] 599 if err := batch.Put(oldest[:], node.rlp()); err != nil { 600 db.lock.RUnlock() 601 return err 602 } 603 // If we exceeded the ideal batch size, commit and reset 604 if batch.ValueSize() >= ethdb.IdealBatchSize { 605 if err := batch.Write(); err != nil { 606 log.Error("Failed to write flush list to disk", "err", err) 607 db.lock.RUnlock() 608 return err 609 } 610 batch.Reset() 611 } 612 // Iterate to the next flush item, or abort if the size cap was achieved. Size 613 // is the total size, including both the useful cached data (hash -> blob), as 614 // well as the flushlist metadata (2*hash). When flushing items from the cache, 615 // we need to reduce both. 616 size -= common.StorageSize(3*common.HashLength + int(node.size)) 617 oldest = node.flushNext 618 } 619 // Flush out any remainder data from the last batch 620 if err := batch.Write(); err != nil { 621 log.Error("Failed to write flush list to disk", "err", err) 622 db.lock.RUnlock() 623 return err 624 } 625 db.lock.RUnlock() 626 627 // Write successful, clear out the flushed data 628 db.lock.Lock() 629 defer db.lock.Unlock() 630 631 if flushPreimages { 632 db.preimages = make(map[common.Hash][]byte) 633 db.preimagesSize = 0 634 } 635 for db.oldest != oldest { 636 node := db.dirties[db.oldest] 637 delete(db.dirties, db.oldest) 638 db.oldest = node.flushNext 639 640 db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size)) 641 } 642 if db.oldest != (common.Hash{}) { 643 db.dirties[db.oldest].flushPrev = common.Hash{} 644 } 645 db.flushnodes += uint64(nodes - len(db.dirties)) 646 db.flushsize += storage - db.dirtiesSize 647 db.flushtime += time.Since(start) 648 649 memcacheFlushTimeTimer.Update(time.Since(start)) 650 memcacheFlushSizeMeter.Mark(int64(storage - db.dirtiesSize)) 651 memcacheFlushNodesMeter.Mark(int64(nodes - len(db.dirties))) 652 653 log.Debug("Persisted nodes from memory database", "nodes", nodes-len(db.dirties), "size", storage-db.dirtiesSize, "time", time.Since(start), 654 "flushnodes", db.flushnodes, "flushsize", db.flushsize, "flushtime", db.flushtime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) 655 656 return nil 657 } 658 659 // Commit iterates over all the children of a particular node, writes them out 660 // to disk, forcefully tearing down all references in both directions. 661 // 662 // As a side effect, all pre-images accumulated up to this point are also written. 663 func (db *Database) Commit(node common.Hash, report bool) error { 664 // Create a database batch to flush persistent data out. It is important that 665 // outside code doesn't see an inconsistent state (referenced data removed from 666 // memory cache during commit but not yet in persistent storage). This is ensured 667 // by only uncaching existing data when the database write finalizes. 668 db.lock.RLock() 669 670 start := time.Now() 671 batch := db.diskdb.NewBatch() 672 673 // Move all of the accumulated preimages into a write batch 674 for hash, preimage := range db.preimages { 675 if err := batch.Put(db.secureKey(hash[:]), preimage); err != nil { 676 log.Error("Failed to commit preimage from trie database", "err", err) 677 db.lock.RUnlock() 678 return err 679 } 680 if batch.ValueSize() > ethdb.IdealBatchSize { 681 if err := batch.Write(); err != nil { 682 return err 683 } 684 batch.Reset() 685 } 686 } 687 // Move the trie itself into the batch, flushing if enough data is accumulated 688 nodes, storage := len(db.dirties), db.dirtiesSize 689 if err := db.commit(node, batch); err != nil { 690 log.Error("Failed to commit trie from trie database", "err", err) 691 db.lock.RUnlock() 692 return err 693 } 694 // Write batch ready, unlock for readers during persistence 695 if err := batch.Write(); err != nil { 696 log.Error("Failed to write trie to disk", "err", err) 697 db.lock.RUnlock() 698 return err 699 } 700 db.lock.RUnlock() 701 702 // Write successful, clear out the flushed data 703 db.lock.Lock() 704 defer db.lock.Unlock() 705 706 db.preimages = make(map[common.Hash][]byte) 707 db.preimagesSize = 0 708 709 db.uncache(node) 710 711 memcacheCommitTimeTimer.Update(time.Since(start)) 712 memcacheCommitSizeMeter.Mark(int64(storage - db.dirtiesSize)) 713 memcacheCommitNodesMeter.Mark(int64(nodes - len(db.dirties))) 714 715 logger := log.Info 716 if !report { 717 logger = log.Debug 718 } 719 logger("Persisted trie from memory database", "nodes", nodes-len(db.dirties)+int(db.flushnodes), "size", storage-db.dirtiesSize+db.flushsize, "time", time.Since(start)+db.flushtime, 720 "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) 721 722 // Reset the garbage collection statistics 723 db.gcnodes, db.gcsize, db.gctime = 0, 0, 0 724 db.flushnodes, db.flushsize, db.flushtime = 0, 0, 0 725 726 return nil 727 } 728 729 // commit is the private locked version of Commit. 730 func (db *Database) commit(hash common.Hash, batch ethdb.Batch) error { 731 // If the node does not exist, it's a previously committed node 732 node, ok := db.dirties[hash] 733 if !ok { 734 return nil 735 } 736 for _, child := range node.childs() { 737 if err := db.commit(child, batch); err != nil { 738 return err 739 } 740 } 741 if err := batch.Put(hash[:], node.rlp()); err != nil { 742 return err 743 } 744 // If we've reached an optimal batch size, commit and start over 745 if batch.ValueSize() >= ethdb.IdealBatchSize { 746 if err := batch.Write(); err != nil { 747 return err 748 } 749 batch.Reset() 750 } 751 return nil 752 } 753 754 // uncache is the post-processing step of a commit operation where the already 755 // persisted trie is removed from the cache. The reason behind the two-phase 756 // commit is to ensure consistent data availability while moving from memory 757 // to disk. 758 func (db *Database) uncache(hash common.Hash) { 759 // If the node does not exist, we're done on this path 760 node, ok := db.dirties[hash] 761 if !ok { 762 return 763 } 764 // Node still exists, remove it from the flush-list 765 switch hash { 766 case db.oldest: 767 db.oldest = node.flushNext 768 db.dirties[node.flushNext].flushPrev = common.Hash{} 769 case db.newest: 770 db.newest = node.flushPrev 771 db.dirties[node.flushPrev].flushNext = common.Hash{} 772 default: 773 db.dirties[node.flushPrev].flushNext = node.flushNext 774 db.dirties[node.flushNext].flushPrev = node.flushPrev 775 } 776 // Uncache the node's subtries and remove the node itself too 777 for _, child := range node.childs() { 778 db.uncache(child) 779 } 780 delete(db.dirties, hash) 781 db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size)) 782 } 783 784 // Size returns the current storage size of the memory cache in front of the 785 // persistent database layer. 786 func (db *Database) Size() (common.StorageSize, common.StorageSize) { 787 db.lock.RLock() 788 defer db.lock.RUnlock() 789 790 // db.dirtiesSize only contains the useful data in the cache, but when reporting 791 // the total memory consumption, the maintenance metadata is also needed to be 792 // counted. For every useful node, we track 2 extra hashes as the flushlist. 793 var flushlistSize = common.StorageSize((len(db.dirties) - 1) * 2 * common.HashLength) 794 return db.dirtiesSize + flushlistSize, db.preimagesSize 795 } 796 797 // verifyIntegrity is a debug method to iterate over the entire trie stored in 798 // memory and check whether every node is reachable from the meta root. The goal 799 // is to find any errors that might cause memory leaks and or trie nodes to go 800 // missing. 801 // 802 // This method is extremely CPU and memory intensive, only use when must. 803 func (db *Database) verifyIntegrity() { 804 // Iterate over all the cached nodes and accumulate them into a set 805 reachable := map[common.Hash]struct{}{{}: {}} 806 807 for child := range db.dirties[common.Hash{}].children { 808 db.accumulate(child, reachable) 809 } 810 // Find any unreachable but cached nodes 811 unreachable := []string{} 812 for hash, node := range db.dirties { 813 if _, ok := reachable[hash]; !ok { 814 unreachable = append(unreachable, fmt.Sprintf("%x: {Node: %v, Parents: %d, Prev: %x, Next: %x}", 815 hash, node.node, node.parents, node.flushPrev, node.flushNext)) 816 } 817 } 818 if len(unreachable) != 0 { 819 panic(fmt.Sprintf("trie cache memory leak: %v", unreachable)) 820 } 821 } 822 823 // accumulate iterates over the trie defined by hash and accumulates all the 824 // cached children found in memory. 825 func (db *Database) accumulate(hash common.Hash, reachable map[common.Hash]struct{}) { 826 // Mark the node reachable if present in the memory cache 827 node, ok := db.dirties[hash] 828 if !ok { 829 return 830 } 831 reachable[hash] = struct{}{} 832 833 // Iterate over all the children and accumulate them too 834 for _, child := range node.childs() { 835 db.accumulate(child, reachable) 836 } 837 }