github.com/chain5j/chain5j-pkg@v1.0.7/collection/trees/tree/database.go (about) 1 // Copyright 2018 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package tree 18 19 import ( 20 "errors" 21 "fmt" 22 "io" 23 "reflect" 24 "sync" 25 "time" 26 27 "github.com/VictoriaMetrics/fastcache" 28 "github.com/chain5j/chain5j-pkg/codec/rlp" 29 "github.com/chain5j/chain5j-pkg/database/kvstore" 30 "github.com/chain5j/chain5j-pkg/types" 31 "github.com/chain5j/chain5j-pkg/util/hexutil" 32 ) 33 34 // secureKeyPrefix is the database key prefix used to store trie node preimages. 35 var secureKeyPrefix = []byte("secure-key-") 36 37 // secureKeyLength is the length of the above prefix + 32byte hash. 38 const secureKeyLength = 11 + 32 39 40 // Database is an intermediate write layer between the trie data structures and 41 // the disk database. The aim is to accumulate trie writes in-memory and only 42 // periodically flush a couple tries to disk, garbage collecting the remainder. 43 // 44 // Note, the trie Database is **not** thread safe in its mutations, but it **is** 45 // thread safe in providing individual, independent node access. The rationale 46 // behind this split design is to provide read access to RPC handlers and sync 47 // servers even while the trie is executing expensive garbage collection. 48 type Database struct { 49 diskdb kvstore.KeyValueStore // Persistent storage for matured trie nodes 50 51 cleans *fastcache.Cache // GC friendly memory cache of clean node RLPs 52 dirties map[types.Hash]*cachedNode // Data and references relationships of dirty nodes 53 oldest types.Hash // Oldest tracked node, flush-list head 54 newest types.Hash // Newest tracked node, flush-list tail 55 56 preimages map[types.Hash][]byte // Preimages of nodes from the secure trie 57 seckeybuf [secureKeyLength]byte // Ephemeral buffer for calculating preimage keys 58 59 gctime time.Duration // Time spent on garbage collection since last commit 60 gcnodes uint64 // Nodes garbage collected since last commit 61 gcsize types.StorageSize // Data storage garbage collected since last commit 62 63 flushtime time.Duration // Time spent on data flushing since last commit 64 flushnodes uint64 // Nodes flushed since last commit 65 flushsize types.StorageSize // Data storage flushed since last commit 66 67 dirtiesSize types.StorageSize // Storage size of the dirty node cache (exc. metadata) 68 childrenSize types.StorageSize // Storage size of the external children tracking 69 preimagesSize types.StorageSize // Storage size of the preimages cache 70 71 lock sync.RWMutex 72 } 73 74 // rawNode is a simple binary blob used to differentiate between collapsed trie 75 // nodes and already encoded RLP binary blobs (while at the same time store them 76 // in the same cache fields). 77 type rawNode []byte 78 79 func (n rawNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } 80 func (n rawNode) fstring(ind string) string { panic("this should never end up in a live trie") } 81 82 // rawFullNode represents only the useful data content of a full node, with the 83 // caches and flags stripped out to minimize its data storage. This type honors 84 // the same RLP encoding as the original parent. 85 type rawFullNode [17]node 86 87 func (n rawFullNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } 88 func (n rawFullNode) fstring(ind string) string { panic("this should never end up in a live trie") } 89 90 func (n rawFullNode) EncodeRLP(w io.Writer) error { 91 var nodes [17]node 92 93 for i, child := range n { 94 if child != nil { 95 nodes[i] = child 96 } else { 97 nodes[i] = nilValueNode 98 } 99 } 100 return rlp.Encode(w, nodes) 101 } 102 103 // rawShortNode represents only the useful data content of a short node, with the 104 // caches and flags stripped out to minimize its data storage. This type honors 105 // the same RLP encoding as the original parent. 106 type rawShortNode struct { 107 Key []byte 108 Val node 109 } 110 111 func (n rawShortNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") } 112 func (n rawShortNode) fstring(ind string) string { panic("this should never end up in a live trie") } 113 114 // cachedNode is all the information we know about a single cached node in the 115 // memory database write layer. 116 type cachedNode struct { 117 node node // Cached collapsed trie node, or raw rlp data 118 size uint16 // Byte size of the useful cached data 119 120 parents uint32 // Number of live nodes referencing this one 121 children map[types.Hash]uint16 // External children referenced by this node 122 123 flushPrev types.Hash // Previous node in the flush-list 124 flushNext types.Hash // Next node in the flush-list 125 } 126 127 // cachedNodeSize is the raw size of a cachedNode data structure without any 128 // node data included. It's an approximate size, but should be a lot better 129 // than not counting them. 130 var cachedNodeSize = int(reflect.TypeOf(cachedNode{}).Size()) 131 132 // cachedNodeChildrenSize is the raw size of an initialized but empty external 133 // reference map. 134 const cachedNodeChildrenSize = 48 135 136 // rlp returns the raw rlp encoded blob of the cached node, either directly from 137 // the cache, or by regenerating it from the collapsed node. 138 func (n *cachedNode) rlp() []byte { 139 if node, ok := n.node.(rawNode); ok { 140 return node 141 } 142 blob, err := rlp.EncodeToBytes(n.node) 143 if err != nil { 144 panic(err) 145 } 146 return blob 147 } 148 149 // obj returns the decoded and expanded trie node, either directly from the cache, 150 // or by regenerating it from the rlp encoded blob. 151 func (n *cachedNode) obj(hash types.Hash) node { 152 if node, ok := n.node.(rawNode); ok { 153 return mustDecodeNode(hash[:], node) 154 } 155 return expandNode(hash[:], n.node) 156 } 157 158 // childs returns all the tracked children of this node, both the implicit ones 159 // from inside the node as well as the explicit ones from outside the node. 160 func (n *cachedNode) childs() []types.Hash { 161 children := make([]types.Hash, 0, 16) 162 for child := range n.children { 163 children = append(children, child) 164 } 165 if _, ok := n.node.(rawNode); !ok { 166 gatherChildren(n.node, &children) 167 } 168 return children 169 } 170 171 // gatherChildren traverses the node hierarchy of a collapsed storage node and 172 // retrieves all the hashnode children. 173 func gatherChildren(n node, children *[]types.Hash) { 174 switch n := n.(type) { 175 case *rawShortNode: 176 gatherChildren(n.Val, children) 177 178 case rawFullNode: 179 for i := 0; i < 16; i++ { 180 gatherChildren(n[i], children) 181 } 182 case hashNode: 183 *children = append(*children, types.BytesToHash(n)) 184 185 case valueNode, nil: 186 187 default: 188 panic(fmt.Sprintf("unknown node type: %T", n)) 189 } 190 } 191 192 // simplifyNode traverses the hierarchy of an expanded memory node and discards 193 // all the internal caches, returning a node that only contains the raw data. 194 func simplifyNode(n node) node { 195 switch n := n.(type) { 196 case *shortNode: 197 // Short nodes discard the flags and cascade 198 return &rawShortNode{Key: n.Key, Val: simplifyNode(n.Val)} 199 200 case *fullNode: 201 // Full nodes discard the flags and cascade 202 node := rawFullNode(n.Children) 203 for i := 0; i < len(node); i++ { 204 if node[i] != nil { 205 node[i] = simplifyNode(node[i]) 206 } 207 } 208 return node 209 210 case valueNode, hashNode, rawNode: 211 return n 212 213 default: 214 panic(fmt.Sprintf("unknown node type: %T", n)) 215 } 216 } 217 218 // expandNode traverses the node hierarchy of a collapsed storage node and converts 219 // all fields and keys into expanded memory form. 220 func expandNode(hash hashNode, n node) node { 221 switch n := n.(type) { 222 case *rawShortNode: 223 // Short nodes need key and child expansion 224 return &shortNode{ 225 Key: compactToHex(n.Key), 226 Val: expandNode(nil, n.Val), 227 flags: nodeFlag{ 228 hash: hash, 229 }, 230 } 231 232 case rawFullNode: 233 // Full nodes need child expansion 234 node := &fullNode{ 235 flags: nodeFlag{ 236 hash: hash, 237 }, 238 } 239 for i := 0; i < len(node.Children); i++ { 240 if n[i] != nil { 241 node.Children[i] = expandNode(nil, n[i]) 242 } 243 } 244 return node 245 246 case valueNode, hashNode: 247 return n 248 249 default: 250 panic(fmt.Sprintf("unknown node type: %T", n)) 251 } 252 } 253 254 // NewDatabase creates a new trie database to store ephemeral trie content before 255 // its written out to disk or garbage collected. No read cache is created, so all 256 // data retrievals will hit the underlying disk database. 257 func NewDatabase(diskdb kvstore.KeyValueStore) *Database { 258 return NewDatabaseWithCache(diskdb, 0) 259 } 260 261 // NewDatabaseWithCache creates a new trie database to store ephemeral trie content 262 // before its written out to disk or garbage collected. It also acts as a read cache 263 // for nodes loaded from disk. 264 func NewDatabaseWithCache(diskdb kvstore.KeyValueStore, cache int) *Database { 265 var cleans *fastcache.Cache 266 if cache > 0 { 267 cleans = fastcache.New(cache * 1024 * 1024) 268 } 269 270 return &Database{ 271 diskdb: diskdb, 272 cleans: cleans, 273 dirties: map[types.Hash]*cachedNode{{}: { 274 children: make(map[types.Hash]uint16), 275 }}, 276 preimages: make(map[types.Hash][]byte), 277 } 278 } 279 280 // DiskDB retrieves the persistent storage backing the trie database. 281 func (db *Database) DiskDB() kvstore.KeyValueStore { 282 return db.diskdb 283 } 284 285 // InsertBlob writes a new reference tracked blob to the memory database if it's 286 // yet unknown. This method should only be used for non-trie nodes that require 287 // reference counting, since trie nodes are garbage collected directly through 288 // their embedded children. 289 func (db *Database) InsertBlob(hash types.Hash, blob []byte) { 290 db.lock.Lock() 291 defer db.lock.Unlock() 292 293 db.insert(hash, blob, rawNode(blob)) 294 } 295 296 // insert inserts a collapsed trie node into the memory database. This method is 297 // a more generic version of InsertBlob, supporting both raw blob insertions as 298 // well ex trie node insertions. The blob must always be specified to allow proper 299 // size tracking. 300 func (db *Database) insert(hash types.Hash, blob []byte, node node) { 301 // If the node's already cached, skip 302 if _, ok := db.dirties[hash]; ok { 303 return 304 } 305 306 // Create the cached entry for this node 307 entry := &cachedNode{ 308 node: simplifyNode(node), 309 size: uint16(len(blob)), 310 flushPrev: db.newest, 311 } 312 for _, child := range entry.childs() { 313 if c := db.dirties[child]; c != nil { 314 c.parents++ 315 } 316 } 317 db.dirties[hash] = entry 318 319 // Update the flush-list endpoints 320 if db.oldest == (types.Hash{}) { 321 db.oldest, db.newest = hash, hash 322 } else { 323 db.dirties[db.newest].flushNext, db.newest = hash, hash 324 } 325 db.dirtiesSize += types.StorageSize(types.HashLength + entry.size) 326 } 327 328 // insertPreimage writes a new trie node pre-image to the memory database if it's 329 // yet unknown. The method will make a copy of the slice. 330 // 331 // Note, this method assumes that the database's lock is held! 332 func (db *Database) insertPreimage(hash types.Hash, preimage []byte) { 333 if _, ok := db.preimages[hash]; ok { 334 return 335 } 336 db.preimages[hash] = hexutil.CopyBytes(preimage) 337 db.preimagesSize += types.StorageSize(types.HashLength + len(preimage)) 338 } 339 340 // node retrieves a cached trie node from memory, or returns nil if none can be 341 // found in the memory cache. 342 func (db *Database) node(hash types.Hash) node { 343 // Retrieve the node from the clean cache if available 344 if db.cleans != nil { 345 if enc := db.cleans.Get(nil, hash[:]); enc != nil { 346 return mustDecodeNode(hash[:], enc) 347 } 348 } 349 // Retrieve the node from the dirty cache if available 350 db.lock.RLock() 351 dirty := db.dirties[hash] 352 db.lock.RUnlock() 353 354 if dirty != nil { 355 return dirty.obj(hash) 356 } 357 358 // Content unavailable in memory, attempt to retrieve from disk 359 enc, err := db.diskdb.Get(hash[:]) 360 if err != nil || enc == nil { 361 return nil 362 } 363 if db.cleans != nil { 364 db.cleans.Set(hash[:], enc) 365 } 366 return mustDecodeNode(hash[:], enc) 367 } 368 369 // Node retrieves an encoded cached trie node from memory. If it cannot be found 370 // cached, the method queries the persistent database for the content. 371 func (db *Database) Node(hash types.Hash) ([]byte, error) { 372 // It doens't make sense to retrieve the metaroot 373 if hash == (types.Hash{}) { 374 return nil, errors.New("not found") 375 } 376 // Retrieve the node from the clean cache if available 377 if db.cleans != nil { 378 if enc := db.cleans.Get(nil, hash[:]); enc != nil { 379 return enc, nil 380 } 381 } 382 // Retrieve the node from the dirty cache if available 383 db.lock.RLock() 384 dirty := db.dirties[hash] 385 db.lock.RUnlock() 386 387 if dirty != nil { 388 return dirty.rlp(), nil 389 } 390 391 // Content unavailable in memory, attempt to retrieve from disk 392 enc, err := db.diskdb.Get(hash[:]) 393 if err == nil && enc != nil { 394 if db.cleans != nil { 395 db.cleans.Set(hash[:], enc) 396 } 397 } 398 return enc, err 399 } 400 401 // preimage retrieves a cached trie node pre-image from memory. If it cannot be 402 // found cached, the method queries the persistent database for the content. 403 func (db *Database) preimage(hash types.Hash) ([]byte, error) { 404 // Retrieve the node from cache if available 405 db.lock.RLock() 406 preimage := db.preimages[hash] 407 db.lock.RUnlock() 408 409 if preimage != nil { 410 return preimage, nil 411 } 412 // Content unavailable in memory, attempt to retrieve from disk 413 return db.diskdb.Get(db.secureKey(hash[:])) 414 } 415 416 // secureKey returns the database key for the preimage of key, as an ephemeral 417 // buffer. The caller must not hold onto the return value because it will become 418 // invalid on the next call. 419 func (db *Database) secureKey(key []byte) []byte { 420 buf := append(db.seckeybuf[:0], secureKeyPrefix...) 421 buf = append(buf, key...) 422 return buf 423 } 424 425 // Nodes retrieves the hashes of all the nodes cached within the memory database. 426 // This method is extremely expensive and should only be used to validate internal 427 // states in test code. 428 func (db *Database) Nodes() []types.Hash { 429 db.lock.RLock() 430 defer db.lock.RUnlock() 431 432 var hashes = make([]types.Hash, 0, len(db.dirties)) 433 for hash := range db.dirties { 434 if hash != (types.Hash{}) { // Special case for "root" references/nodes 435 hashes = append(hashes, hash) 436 } 437 } 438 return hashes 439 } 440 441 // Reference adds a new reference from a parent node to a child node. 442 func (db *Database) Reference(child types.Hash, parent types.Hash) { 443 db.lock.Lock() 444 defer db.lock.Unlock() 445 446 db.reference(child, parent) 447 } 448 449 // reference is the private locked version of Reference. 450 func (db *Database) reference(child types.Hash, parent types.Hash) { 451 // If the node does not exist, it's a node pulled from disk, skip 452 node, ok := db.dirties[child] 453 if !ok { 454 return 455 } 456 // If the reference already exists, only duplicate for roots 457 if db.dirties[parent].children == nil { 458 db.dirties[parent].children = make(map[types.Hash]uint16) 459 db.childrenSize += cachedNodeChildrenSize 460 } else if _, ok = db.dirties[parent].children[child]; ok && parent != (types.Hash{}) { 461 return 462 } 463 node.parents++ 464 db.dirties[parent].children[child]++ 465 if db.dirties[parent].children[child] == 1 { 466 db.childrenSize += types.HashLength + 2 // uint16 counter 467 } 468 } 469 470 // Dereference removes an existing reference from a root node. 471 func (db *Database) Dereference(root types.Hash) { 472 // Sanity check to ensure that the meta-root is not removed 473 if root == (types.Hash{}) { 474 logger().Error("Attempted to dereference the trie cache meta root") 475 return 476 } 477 db.lock.Lock() 478 defer db.lock.Unlock() 479 480 nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now() 481 db.dereference(root, types.Hash{}) 482 483 db.gcnodes += uint64(nodes - len(db.dirties)) 484 db.gcsize += storage - db.dirtiesSize 485 db.gctime += time.Since(start) 486 487 logger().Debug("Dereferenced trie from memory database", "nodes", nodes-len(db.dirties), "size", storage-db.dirtiesSize, "time", time.Since(start), 488 "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) 489 } 490 491 // dereference is the private locked version of Dereference. 492 func (db *Database) dereference(child types.Hash, parent types.Hash) { 493 // Dereference the parent-child 494 node := db.dirties[parent] 495 496 if node.children != nil && node.children[child] > 0 { 497 node.children[child]-- 498 if node.children[child] == 0 { 499 delete(node.children, child) 500 db.childrenSize -= (types.HashLength + 2) // uint16 counter 501 } 502 } 503 // If the child does not exist, it's a previously committed node. 504 node, ok := db.dirties[child] 505 if !ok { 506 return 507 } 508 // If there are no more references to the child, delete it and cascade 509 if node.parents > 0 { 510 // This is a special cornercase where a node loaded from disk (i.e. not in the 511 // memcache any more) gets reinjected as a new node (short node split into full, 512 // then reverted into short), causing a cached node to have no parents. That is 513 // no problem in itself, but don't make maxint parents out of it. 514 node.parents-- 515 } 516 if node.parents == 0 { 517 // Remove the node from the flush-list 518 switch child { 519 case db.oldest: 520 db.oldest = node.flushNext 521 db.dirties[node.flushNext].flushPrev = types.Hash{} 522 case db.newest: 523 db.newest = node.flushPrev 524 db.dirties[node.flushPrev].flushNext = types.Hash{} 525 default: 526 db.dirties[node.flushPrev].flushNext = node.flushNext 527 db.dirties[node.flushNext].flushPrev = node.flushPrev 528 } 529 // Dereference all children and delete the node 530 for _, hash := range node.childs() { 531 db.dereference(hash, child) 532 } 533 delete(db.dirties, child) 534 db.dirtiesSize -= types.StorageSize(types.HashLength + int(node.size)) 535 if node.children != nil { 536 db.childrenSize -= cachedNodeChildrenSize 537 } 538 } 539 } 540 541 // Cap iteratively flushes old but still referenced trie nodes until the total 542 // memory usage goes below the given threshold. 543 // 544 // Note, this method is a non-synchronized mutator. It is unsafe to call this 545 // concurrently with other mutators. 546 func (db *Database) Cap(limit types.StorageSize) error { 547 // Create a database batch to flush persistent data out. It is important that 548 // outside code doesn't see an inconsistent state (referenced data removed from 549 // memory cache during commit but not yet in persistent storage). This is ensured 550 // by only uncaching existing data when the database write finalizes. 551 nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now() 552 batch := db.diskdb.NewBatch() 553 554 // db.dirtiesSize only contains the useful data in the cache, but when reporting 555 // the total memory consumption, the maintenance metadata is also needed to be 556 // counted. 557 size := db.dirtiesSize + types.StorageSize((len(db.dirties)-1)*cachedNodeSize) 558 size += db.childrenSize - types.StorageSize(len(db.dirties[types.Hash{}].children)*(types.HashLength+2)) 559 560 // If the preimage cache got large enough, push to disk. If it's still small 561 // leave for later to deduplicate writes. 562 flushPreimages := db.preimagesSize > 4*1024*1024 563 if flushPreimages { 564 for hash, preimage := range db.preimages { 565 if err := batch.Put(db.secureKey(hash[:]), preimage); err != nil { 566 logger().Error("Failed to commit preimage from trie database", "err", err) 567 return err 568 } 569 if batch.ValueSize() > kvstore.IdealBatchSize { 570 if err := batch.Write(); err != nil { 571 return err 572 } 573 batch.Reset() 574 } 575 } 576 } 577 // Keep committing nodes from the flush-list until we're below allowance 578 oldest := db.oldest 579 for size > limit && oldest != (types.Hash{}) { 580 // Fetch the oldest referenced node and push into the batch 581 node := db.dirties[oldest] 582 if err := batch.Put(oldest[:], node.rlp()); err != nil { 583 return err 584 } 585 // If we exceeded the ideal batch size, commit and reset 586 if batch.ValueSize() >= kvstore.IdealBatchSize { 587 if err := batch.Write(); err != nil { 588 logger().Error("Failed to write flush list to disk", "err", err) 589 return err 590 } 591 batch.Reset() 592 } 593 // Iterate to the next flush item, or abort if the size cap was achieved. Size 594 // is the total size, including the useful cached data (hash -> blob), the 595 // cache item metadata, as well as external children mappings. 596 size -= types.StorageSize(types.HashLength + int(node.size) + cachedNodeSize) 597 if node.children != nil { 598 size -= types.StorageSize(cachedNodeChildrenSize + len(node.children)*(types.HashLength+2)) 599 } 600 oldest = node.flushNext 601 } 602 // Flush out any remainder data from the last batch 603 if err := batch.Write(); err != nil { 604 logger().Error("Failed to write flush list to disk", "err", err) 605 return err 606 } 607 // Write successful, clear out the flushed data 608 db.lock.Lock() 609 defer db.lock.Unlock() 610 611 if flushPreimages { 612 db.preimages = make(map[types.Hash][]byte) 613 db.preimagesSize = 0 614 } 615 for db.oldest != oldest { 616 node := db.dirties[db.oldest] 617 delete(db.dirties, db.oldest) 618 db.oldest = node.flushNext 619 620 db.dirtiesSize -= types.StorageSize(types.HashLength + int(node.size)) 621 if node.children != nil { 622 db.childrenSize -= types.StorageSize(cachedNodeChildrenSize + len(node.children)*(types.HashLength+2)) 623 } 624 } 625 if db.oldest != (types.Hash{}) { 626 db.dirties[db.oldest].flushPrev = types.Hash{} 627 } 628 db.flushnodes += uint64(nodes - len(db.dirties)) 629 db.flushsize += storage - db.dirtiesSize 630 db.flushtime += time.Since(start) 631 632 logger().Debug("Persisted nodes from memory database", "nodes", nodes-len(db.dirties), "size", storage-db.dirtiesSize, "time", time.Since(start), 633 "flushnodes", db.flushnodes, "flushsize", db.flushsize, "flushtime", db.flushtime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) 634 635 return nil 636 } 637 638 // Commit iterates over all the children of a particular node, writes them out 639 // to disk, forcefully tearing down all references in both directions. As a side 640 // effect, all pre-images accumulated up to this point are also written. 641 // 642 // Note, this method is a non-synchronized mutator. It is unsafe to call this 643 // concurrently with other mutators. 644 func (db *Database) Commit(node types.Hash, report bool) error { 645 // Create a database batch to flush persistent data out. It is important that 646 // outside code doesn't see an inconsistent state (referenced data removed from 647 // memory cache during commit but not yet in persistent storage). This is ensured 648 // by only uncaching existing data when the database write finalizes. 649 start := time.Now() 650 batch := db.diskdb.NewBatch() 651 652 // Move all of the accumulated preimages into a write batch 653 for hash, preimage := range db.preimages { 654 if err := batch.Put(db.secureKey(hash[:]), preimage); err != nil { 655 logger().Error("Failed to commit preimage from trie database", "err", err) 656 return err 657 } 658 // If the batch is too large, flush to disk 659 if batch.ValueSize() > kvstore.IdealBatchSize { 660 if err := batch.Write(); err != nil { 661 return err 662 } 663 batch.Reset() 664 } 665 } 666 // Since we're going to replay trie node writes into the clean cache, flush out 667 // any batched pre-images before continuing. 668 if err := batch.Write(); err != nil { 669 return err 670 } 671 batch.Reset() 672 673 // Move the trie itself into the batch, flushing if enough data is accumulated 674 nodes, storage := len(db.dirties), db.dirtiesSize 675 676 uncacher := &cleaner{db} 677 if err := db.commit(node, batch, uncacher); err != nil { 678 logger().Error("Failed to commit trie from trie database", "err", err) 679 return err 680 } 681 // Trie mostly committed to disk, flush any batch leftovers 682 if err := batch.Write(); err != nil { 683 logger().Error("Failed to write trie to disk", "err", err) 684 return err 685 } 686 // Uncache any leftovers in the last batch 687 db.lock.Lock() 688 defer db.lock.Unlock() 689 690 batch.Replay(uncacher) 691 batch.Reset() 692 693 // Reset the storage counters and bumpd metrics 694 db.preimages = make(map[types.Hash][]byte) 695 db.preimagesSize = 0 696 697 if report { 698 logger().Debug("Persisted trie from memory database", "nodes", nodes-len(db.dirties)+int(db.flushnodes), "size", storage-db.dirtiesSize+db.flushsize, "time", time.Since(start)+db.flushtime, 699 "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.dirties), "livesize", db.dirtiesSize) 700 } 701 702 // Reset the garbage collection statistics 703 db.gcnodes, db.gcsize, db.gctime = 0, 0, 0 704 db.flushnodes, db.flushsize, db.flushtime = 0, 0, 0 705 706 return nil 707 } 708 709 // commit is the private locked version of Commit. 710 func (db *Database) commit(hash types.Hash, batch kvstore.Batch, uncacher *cleaner) error { 711 // If the node does not exist, it's a previously committed node 712 node, ok := db.dirties[hash] 713 if !ok { 714 return nil 715 } 716 for _, child := range node.childs() { 717 if err := db.commit(child, batch, uncacher); err != nil { 718 return err 719 } 720 } 721 if err := batch.Put(hash[:], node.rlp()); err != nil { 722 return err 723 } 724 // If we've reached an optimal batch size, commit and start over 725 if batch.ValueSize() >= kvstore.IdealBatchSize { 726 if err := batch.Write(); err != nil { 727 return err 728 } 729 db.lock.Lock() 730 batch.Replay(uncacher) 731 batch.Reset() 732 db.lock.Unlock() 733 } 734 return nil 735 } 736 737 // cleaner is a database batch replayer that takes a batch of write operations 738 // and cleans up the trie database from anything written to disk. 739 type cleaner struct { 740 db *Database 741 } 742 743 // Put reacts to database writes and implements dirty data uncaching. This is the 744 // post-processing step of a commit operation where the already persisted trie is 745 // removed from the dirty cache and moved into the clean cache. The reason behind 746 // the two-phase commit is to ensure ensure data availability while moving from 747 // memory to disk. 748 func (c *cleaner) Put(key []byte, rlp []byte) error { 749 hash := types.BytesToHash(key) 750 751 // If the node does not exist, we're done on this path 752 node, ok := c.db.dirties[hash] 753 if !ok { 754 return nil 755 } 756 // Node still exists, remove it from the flush-list 757 switch hash { 758 case c.db.oldest: 759 c.db.oldest = node.flushNext 760 c.db.dirties[node.flushNext].flushPrev = types.Hash{} 761 case c.db.newest: 762 c.db.newest = node.flushPrev 763 c.db.dirties[node.flushPrev].flushNext = types.Hash{} 764 default: 765 c.db.dirties[node.flushPrev].flushNext = node.flushNext 766 c.db.dirties[node.flushNext].flushPrev = node.flushPrev 767 } 768 // Remove the node from the dirty cache 769 delete(c.db.dirties, hash) 770 c.db.dirtiesSize -= types.StorageSize(types.HashLength + int(node.size)) 771 if node.children != nil { 772 c.db.dirtiesSize -= types.StorageSize(cachedNodeChildrenSize + len(node.children)*(types.HashLength+2)) 773 } 774 // Move the flushed node into the clean cache to prevent insta-reloads 775 if c.db.cleans != nil { 776 c.db.cleans.Set(hash[:], rlp) 777 } 778 return nil 779 } 780 781 func (c *cleaner) Delete(key []byte) error { 782 panic("not implemented") 783 } 784 785 // Size returns the current storage size of the memory cache in front of the 786 // persistent database layer. 787 func (db *Database) Size() (types.StorageSize, types.StorageSize) { 788 db.lock.RLock() 789 defer db.lock.RUnlock() 790 791 // db.dirtiesSize only contains the useful data in the cache, but when reporting 792 // the total memory consumption, the maintenance metadata is also needed to be 793 // counted. 794 var metadataSize = types.StorageSize((len(db.dirties) - 1) * cachedNodeSize) 795 var metarootRefs = types.StorageSize(len(db.dirties[types.Hash{}].children) * (types.HashLength + 2)) 796 return db.dirtiesSize + db.childrenSize + metadataSize - metarootRefs, db.preimagesSize 797 }