github.1485827954.workers.dev/ethereum/go-ethereum@v1.14.3/core/state/snapshot/snapshot.go (about) 1 // Copyright 2019 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // Package snapshot implements a journalled, dynamic state dump. 18 package snapshot 19 20 import ( 21 "bytes" 22 "errors" 23 "fmt" 24 "sync" 25 26 "github.com/ethereum/go-ethereum/common" 27 "github.com/ethereum/go-ethereum/core/rawdb" 28 "github.com/ethereum/go-ethereum/core/types" 29 "github.com/ethereum/go-ethereum/ethdb" 30 "github.com/ethereum/go-ethereum/log" 31 "github.com/ethereum/go-ethereum/metrics" 32 "github.com/ethereum/go-ethereum/rlp" 33 "github.com/ethereum/go-ethereum/triedb" 34 ) 35 36 var ( 37 snapshotCleanAccountHitMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/hit", nil) 38 snapshotCleanAccountMissMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/miss", nil) 39 snapshotCleanAccountInexMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/inex", nil) 40 snapshotCleanAccountReadMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/read", nil) 41 snapshotCleanAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/account/write", nil) 42 43 snapshotCleanStorageHitMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/hit", nil) 44 snapshotCleanStorageMissMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/miss", nil) 45 snapshotCleanStorageInexMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/inex", nil) 46 snapshotCleanStorageReadMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/read", nil) 47 snapshotCleanStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/clean/storage/write", nil) 48 49 snapshotDirtyAccountHitMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/hit", nil) 50 snapshotDirtyAccountMissMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/miss", nil) 51 snapshotDirtyAccountInexMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/inex", nil) 52 snapshotDirtyAccountReadMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/read", nil) 53 snapshotDirtyAccountWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/account/write", nil) 54 55 snapshotDirtyStorageHitMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/hit", nil) 56 snapshotDirtyStorageMissMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/miss", nil) 57 snapshotDirtyStorageInexMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/inex", nil) 58 snapshotDirtyStorageReadMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/read", nil) 59 snapshotDirtyStorageWriteMeter = metrics.NewRegisteredMeter("state/snapshot/dirty/storage/write", nil) 60 61 snapshotDirtyAccountHitDepthHist = metrics.NewRegisteredHistogram("state/snapshot/dirty/account/hit/depth", nil, metrics.NewExpDecaySample(1028, 0.015)) 62 snapshotDirtyStorageHitDepthHist = metrics.NewRegisteredHistogram("state/snapshot/dirty/storage/hit/depth", nil, metrics.NewExpDecaySample(1028, 0.015)) 63 64 snapshotFlushAccountItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/item", nil) 65 snapshotFlushAccountSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/account/size", nil) 66 snapshotFlushStorageItemMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/item", nil) 67 snapshotFlushStorageSizeMeter = metrics.NewRegisteredMeter("state/snapshot/flush/storage/size", nil) 68 69 snapshotBloomIndexTimer = metrics.NewRegisteredResettingTimer("state/snapshot/bloom/index", nil) 70 snapshotBloomErrorGauge = metrics.NewRegisteredGaugeFloat64("state/snapshot/bloom/error", nil) 71 72 snapshotBloomAccountTrueHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/account/truehit", nil) 73 snapshotBloomAccountFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/account/falsehit", nil) 74 snapshotBloomAccountMissMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/account/miss", nil) 75 76 snapshotBloomStorageTrueHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/truehit", nil) 77 snapshotBloomStorageFalseHitMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/falsehit", nil) 78 snapshotBloomStorageMissMeter = metrics.NewRegisteredMeter("state/snapshot/bloom/storage/miss", nil) 79 80 // ErrSnapshotStale is returned from data accessors if the underlying snapshot 81 // layer had been invalidated due to the chain progressing forward far enough 82 // to not maintain the layer's original state. 83 ErrSnapshotStale = errors.New("snapshot stale") 84 85 // ErrNotCoveredYet is returned from data accessors if the underlying snapshot 86 // is being generated currently and the requested data item is not yet in the 87 // range of accounts covered. 88 ErrNotCoveredYet = errors.New("not covered yet") 89 90 // ErrNotConstructed is returned if the callers want to iterate the snapshot 91 // while the generation is not finished yet. 92 ErrNotConstructed = errors.New("snapshot is not constructed") 93 94 // errSnapshotCycle is returned if a snapshot is attempted to be inserted 95 // that forms a cycle in the snapshot tree. 96 errSnapshotCycle = errors.New("snapshot cycle") 97 ) 98 99 // Snapshot represents the functionality supported by a snapshot storage layer. 100 type Snapshot interface { 101 // Root returns the root hash for which this snapshot was made. 102 Root() common.Hash 103 104 // Account directly retrieves the account associated with a particular hash in 105 // the snapshot slim data format. 106 Account(hash common.Hash) (*types.SlimAccount, error) 107 108 // AccountRLP directly retrieves the account RLP associated with a particular 109 // hash in the snapshot slim data format. 110 AccountRLP(hash common.Hash) ([]byte, error) 111 112 // Storage directly retrieves the storage data associated with a particular hash, 113 // within a particular account. 114 Storage(accountHash, storageHash common.Hash) ([]byte, error) 115 } 116 117 // snapshot is the internal version of the snapshot data layer that supports some 118 // additional methods compared to the public API. 119 type snapshot interface { 120 Snapshot 121 122 // Parent returns the subsequent layer of a snapshot, or nil if the base was 123 // reached. 124 // 125 // Note, the method is an internal helper to avoid type switching between the 126 // disk and diff layers. There is no locking involved. 127 Parent() snapshot 128 129 // Update creates a new layer on top of the existing snapshot diff tree with 130 // the specified data items. 131 // 132 // Note, the maps are retained by the method to avoid copying everything. 133 Update(blockRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) *diffLayer 134 135 // Journal commits an entire diff hierarchy to disk into a single journal entry. 136 // This is meant to be used during shutdown to persist the snapshot without 137 // flattening everything down (bad for reorgs). 138 Journal(buffer *bytes.Buffer) (common.Hash, error) 139 140 // Stale return whether this layer has become stale (was flattened across) or 141 // if it's still live. 142 Stale() bool 143 144 // AccountIterator creates an account iterator over an arbitrary layer. 145 AccountIterator(seek common.Hash) AccountIterator 146 147 // StorageIterator creates a storage iterator over an arbitrary layer. 148 StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool) 149 } 150 151 // Config includes the configurations for snapshots. 152 type Config struct { 153 CacheSize int // Megabytes permitted to use for read caches 154 Recovery bool // Indicator that the snapshots is in the recovery mode 155 NoBuild bool // Indicator that the snapshots generation is disallowed 156 AsyncBuild bool // The snapshot generation is allowed to be constructed asynchronously 157 } 158 159 // Tree is an Ethereum state snapshot tree. It consists of one persistent base 160 // layer backed by a key-value store, on top of which arbitrarily many in-memory 161 // diff layers are topped. The memory diffs can form a tree with branching, but 162 // the disk layer is singleton and common to all. If a reorg goes deeper than the 163 // disk layer, everything needs to be deleted. 164 // 165 // The goal of a state snapshot is twofold: to allow direct access to account and 166 // storage data to avoid expensive multi-level trie lookups; and to allow sorted, 167 // cheap iteration of the account/storage tries for sync aid. 168 type Tree struct { 169 config Config // Snapshots configurations 170 diskdb ethdb.KeyValueStore // Persistent database to store the snapshot 171 triedb *triedb.Database // In-memory cache to access the trie through 172 layers map[common.Hash]snapshot // Collection of all known layers 173 lock sync.RWMutex 174 175 // Test hooks 176 onFlatten func() // Hook invoked when the bottom most diff layers are flattened 177 } 178 179 // New attempts to load an already existing snapshot from a persistent key-value 180 // store (with a number of memory layers from a journal), ensuring that the head 181 // of the snapshot matches the expected one. 182 // 183 // If the snapshot is missing or the disk layer is broken, the snapshot will be 184 // reconstructed using both the existing data and the state trie. 185 // The repair happens on a background thread. 186 // 187 // If the memory layers in the journal do not match the disk layer (e.g. there is 188 // a gap) or the journal is missing, there are two repair cases: 189 // 190 // - if the 'recovery' parameter is true, memory diff-layers and the disk-layer 191 // will all be kept. This case happens when the snapshot is 'ahead' of the 192 // state trie. 193 // - otherwise, the entire snapshot is considered invalid and will be recreated on 194 // a background thread. 195 func New(config Config, diskdb ethdb.KeyValueStore, triedb *triedb.Database, root common.Hash) (*Tree, error) { 196 // Create a new, empty snapshot tree 197 snap := &Tree{ 198 config: config, 199 diskdb: diskdb, 200 triedb: triedb, 201 layers: make(map[common.Hash]snapshot), 202 } 203 // Attempt to load a previously persisted snapshot and rebuild one if failed 204 head, disabled, err := loadSnapshot(diskdb, triedb, root, config.CacheSize, config.Recovery, config.NoBuild) 205 if disabled { 206 log.Warn("Snapshot maintenance disabled (syncing)") 207 return snap, nil 208 } 209 // Create the building waiter iff the background generation is allowed 210 if !config.NoBuild && !config.AsyncBuild { 211 defer snap.waitBuild() 212 } 213 if err != nil { 214 log.Warn("Failed to load snapshot", "err", err) 215 if !config.NoBuild { 216 snap.Rebuild(root) 217 return snap, nil 218 } 219 return nil, err // Bail out the error, don't rebuild automatically. 220 } 221 // Existing snapshot loaded, seed all the layers 222 for head != nil { 223 snap.layers[head.Root()] = head 224 head = head.Parent() 225 } 226 return snap, nil 227 } 228 229 // waitBuild blocks until the snapshot finishes rebuilding. This method is meant 230 // to be used by tests to ensure we're testing what we believe we are. 231 func (t *Tree) waitBuild() { 232 // Find the rebuild termination channel 233 var done chan struct{} 234 235 t.lock.RLock() 236 for _, layer := range t.layers { 237 if layer, ok := layer.(*diskLayer); ok { 238 done = layer.genPending 239 break 240 } 241 } 242 t.lock.RUnlock() 243 244 // Wait until the snapshot is generated 245 if done != nil { 246 <-done 247 } 248 } 249 250 // Disable interrupts any pending snapshot generator, deletes all the snapshot 251 // layers in memory and marks snapshots disabled globally. In order to resume 252 // the snapshot functionality, the caller must invoke Rebuild. 253 func (t *Tree) Disable() { 254 // Interrupt any live snapshot layers 255 t.lock.Lock() 256 defer t.lock.Unlock() 257 258 for _, layer := range t.layers { 259 switch layer := layer.(type) { 260 case *diskLayer: 261 262 layer.lock.RLock() 263 generating := layer.genMarker != nil 264 layer.lock.RUnlock() 265 if !generating { 266 // Generator is already aborted or finished 267 break 268 } 269 // If the base layer is generating, abort it 270 if layer.genAbort != nil { 271 abort := make(chan *generatorStats) 272 layer.genAbort <- abort 273 <-abort 274 } 275 // Layer should be inactive now, mark it as stale 276 layer.lock.Lock() 277 layer.stale = true 278 layer.lock.Unlock() 279 280 case *diffLayer: 281 // If the layer is a simple diff, simply mark as stale 282 layer.lock.Lock() 283 layer.stale.Store(true) 284 layer.lock.Unlock() 285 286 default: 287 panic(fmt.Sprintf("unknown layer type: %T", layer)) 288 } 289 } 290 t.layers = map[common.Hash]snapshot{} 291 292 // Delete all snapshot liveness information from the database 293 batch := t.diskdb.NewBatch() 294 295 rawdb.WriteSnapshotDisabled(batch) 296 rawdb.DeleteSnapshotRoot(batch) 297 rawdb.DeleteSnapshotJournal(batch) 298 rawdb.DeleteSnapshotGenerator(batch) 299 rawdb.DeleteSnapshotRecoveryNumber(batch) 300 // Note, we don't delete the sync progress 301 302 if err := batch.Write(); err != nil { 303 log.Crit("Failed to disable snapshots", "err", err) 304 } 305 } 306 307 // Snapshot retrieves a snapshot belonging to the given block root, or nil if no 308 // snapshot is maintained for that block. 309 func (t *Tree) Snapshot(blockRoot common.Hash) Snapshot { 310 t.lock.RLock() 311 defer t.lock.RUnlock() 312 313 return t.layers[blockRoot] 314 } 315 316 // Snapshots returns all visited layers from the topmost layer with specific 317 // root and traverses downward. The layer amount is limited by the given number. 318 // If nodisk is set, then disk layer is excluded. 319 func (t *Tree) Snapshots(root common.Hash, limits int, nodisk bool) []Snapshot { 320 t.lock.RLock() 321 defer t.lock.RUnlock() 322 323 if limits == 0 { 324 return nil 325 } 326 layer := t.layers[root] 327 if layer == nil { 328 return nil 329 } 330 var ret []Snapshot 331 for { 332 if _, isdisk := layer.(*diskLayer); isdisk && nodisk { 333 break 334 } 335 ret = append(ret, layer) 336 limits -= 1 337 if limits == 0 { 338 break 339 } 340 parent := layer.Parent() 341 if parent == nil { 342 break 343 } 344 layer = parent 345 } 346 return ret 347 } 348 349 // Update adds a new snapshot into the tree, if that can be linked to an existing 350 // old parent. It is disallowed to insert a disk layer (the origin of all). 351 func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error { 352 // Reject noop updates to avoid self-loops in the snapshot tree. This is a 353 // special case that can only happen for Clique networks where empty blocks 354 // don't modify the state (0 block subsidy). 355 // 356 // Although we could silently ignore this internally, it should be the caller's 357 // responsibility to avoid even attempting to insert such a snapshot. 358 if blockRoot == parentRoot { 359 return errSnapshotCycle 360 } 361 // Generate a new snapshot on top of the parent 362 parent := t.Snapshot(parentRoot) 363 if parent == nil { 364 return fmt.Errorf("parent [%#x] snapshot missing", parentRoot) 365 } 366 snap := parent.(snapshot).Update(blockRoot, destructs, accounts, storage) 367 368 // Save the new snapshot for later 369 t.lock.Lock() 370 defer t.lock.Unlock() 371 372 t.layers[snap.root] = snap 373 return nil 374 } 375 376 // Cap traverses downwards the snapshot tree from a head block hash until the 377 // number of allowed layers are crossed. All layers beyond the permitted number 378 // are flattened downwards. 379 // 380 // Note, the final diff layer count in general will be one more than the amount 381 // requested. This happens because the bottom-most diff layer is the accumulator 382 // which may or may not overflow and cascade to disk. Since this last layer's 383 // survival is only known *after* capping, we need to omit it from the count if 384 // we want to ensure that *at least* the requested number of diff layers remain. 385 func (t *Tree) Cap(root common.Hash, layers int) error { 386 // Retrieve the head snapshot to cap from 387 snap := t.Snapshot(root) 388 if snap == nil { 389 return fmt.Errorf("snapshot [%#x] missing", root) 390 } 391 diff, ok := snap.(*diffLayer) 392 if !ok { 393 return fmt.Errorf("snapshot [%#x] is disk layer", root) 394 } 395 // If the generator is still running, use a more aggressive cap 396 diff.origin.lock.RLock() 397 if diff.origin.genMarker != nil && layers > 8 { 398 layers = 8 399 } 400 diff.origin.lock.RUnlock() 401 402 // Run the internal capping and discard all stale layers 403 t.lock.Lock() 404 defer t.lock.Unlock() 405 406 // Flattening the bottom-most diff layer requires special casing since there's 407 // no child to rewire to the grandparent. In that case we can fake a temporary 408 // child for the capping and then remove it. 409 if layers == 0 { 410 // If full commit was requested, flatten the diffs and merge onto disk 411 diff.lock.RLock() 412 base := diffToDisk(diff.flatten().(*diffLayer)) 413 diff.lock.RUnlock() 414 415 // Replace the entire snapshot tree with the flat base 416 t.layers = map[common.Hash]snapshot{base.root: base} 417 return nil 418 } 419 persisted := t.cap(diff, layers) 420 421 // Remove any layer that is stale or links into a stale layer 422 children := make(map[common.Hash][]common.Hash) 423 for root, snap := range t.layers { 424 if diff, ok := snap.(*diffLayer); ok { 425 parent := diff.parent.Root() 426 children[parent] = append(children[parent], root) 427 } 428 } 429 var remove func(root common.Hash) 430 remove = func(root common.Hash) { 431 delete(t.layers, root) 432 for _, child := range children[root] { 433 remove(child) 434 } 435 delete(children, root) 436 } 437 for root, snap := range t.layers { 438 if snap.Stale() { 439 remove(root) 440 } 441 } 442 // If the disk layer was modified, regenerate all the cumulative blooms 443 if persisted != nil { 444 var rebloom func(root common.Hash) 445 rebloom = func(root common.Hash) { 446 if diff, ok := t.layers[root].(*diffLayer); ok { 447 diff.rebloom(persisted) 448 } 449 for _, child := range children[root] { 450 rebloom(child) 451 } 452 } 453 rebloom(persisted.root) 454 } 455 return nil 456 } 457 458 // cap traverses downwards the diff tree until the number of allowed layers are 459 // crossed. All diffs beyond the permitted number are flattened downwards. If the 460 // layer limit is reached, memory cap is also enforced (but not before). 461 // 462 // The method returns the new disk layer if diffs were persisted into it. 463 // 464 // Note, the final diff layer count in general will be one more than the amount 465 // requested. This happens because the bottom-most diff layer is the accumulator 466 // which may or may not overflow and cascade to disk. Since this last layer's 467 // survival is only known *after* capping, we need to omit it from the count if 468 // we want to ensure that *at least* the requested number of diff layers remain. 469 func (t *Tree) cap(diff *diffLayer, layers int) *diskLayer { 470 // Dive until we run out of layers or reach the persistent database 471 for i := 0; i < layers-1; i++ { 472 // If we still have diff layers below, continue down 473 if parent, ok := diff.parent.(*diffLayer); ok { 474 diff = parent 475 } else { 476 // Diff stack too shallow, return without modifications 477 return nil 478 } 479 } 480 // We're out of layers, flatten anything below, stopping if it's the disk or if 481 // the memory limit is not yet exceeded. 482 switch parent := diff.parent.(type) { 483 case *diskLayer: 484 return nil 485 486 case *diffLayer: 487 // Hold the write lock until the flattened parent is linked correctly. 488 // Otherwise, the stale layer may be accessed by external reads in the 489 // meantime. 490 diff.lock.Lock() 491 defer diff.lock.Unlock() 492 493 // Flatten the parent into the grandparent. The flattening internally obtains a 494 // write lock on grandparent. 495 flattened := parent.flatten().(*diffLayer) 496 t.layers[flattened.root] = flattened 497 498 // Invoke the hook if it's registered. Ugly hack. 499 if t.onFlatten != nil { 500 t.onFlatten() 501 } 502 diff.parent = flattened 503 if flattened.memory < aggregatorMemoryLimit { 504 // Accumulator layer is smaller than the limit, so we can abort, unless 505 // there's a snapshot being generated currently. In that case, the trie 506 // will move from underneath the generator so we **must** merge all the 507 // partial data down into the snapshot and restart the generation. 508 if flattened.parent.(*diskLayer).genAbort == nil { 509 return nil 510 } 511 } 512 default: 513 panic(fmt.Sprintf("unknown data layer: %T", parent)) 514 } 515 // If the bottom-most layer is larger than our memory cap, persist to disk 516 bottom := diff.parent.(*diffLayer) 517 518 bottom.lock.RLock() 519 base := diffToDisk(bottom) 520 bottom.lock.RUnlock() 521 522 t.layers[base.root] = base 523 diff.parent = base 524 return base 525 } 526 527 // diffToDisk merges a bottom-most diff into the persistent disk layer underneath 528 // it. The method will panic if called onto a non-bottom-most diff layer. 529 // 530 // The disk layer persistence should be operated in an atomic way. All updates should 531 // be discarded if the whole transition if not finished. 532 func diffToDisk(bottom *diffLayer) *diskLayer { 533 var ( 534 base = bottom.parent.(*diskLayer) 535 batch = base.diskdb.NewBatch() 536 stats *generatorStats 537 ) 538 // If the disk layer is running a snapshot generator, abort it 539 if base.genAbort != nil { 540 abort := make(chan *generatorStats) 541 base.genAbort <- abort 542 stats = <-abort 543 } 544 // Put the deletion in the batch writer, flush all updates in the final step. 545 rawdb.DeleteSnapshotRoot(batch) 546 547 // Mark the original base as stale as we're going to create a new wrapper 548 base.lock.Lock() 549 if base.stale { 550 panic("parent disk layer is stale") // we've committed into the same base from two children, boo 551 } 552 base.stale = true 553 base.lock.Unlock() 554 555 // Destroy all the destructed accounts from the database 556 for hash := range bottom.destructSet { 557 // Skip any account not covered yet by the snapshot 558 if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 { 559 continue 560 } 561 // Remove all storage slots 562 rawdb.DeleteAccountSnapshot(batch, hash) 563 base.cache.Set(hash[:], nil) 564 565 it := rawdb.IterateStorageSnapshots(base.diskdb, hash) 566 for it.Next() { 567 key := it.Key() 568 batch.Delete(key) 569 base.cache.Del(key[1:]) 570 snapshotFlushStorageItemMeter.Mark(1) 571 572 // Ensure we don't delete too much data blindly (contract can be 573 // huge). It's ok to flush, the root will go missing in case of a 574 // crash and we'll detect and regenerate the snapshot. 575 if batch.ValueSize() > 64*1024*1024 { 576 if err := batch.Write(); err != nil { 577 log.Crit("Failed to write storage deletions", "err", err) 578 } 579 batch.Reset() 580 } 581 } 582 it.Release() 583 } 584 // Push all updated accounts into the database 585 for hash, data := range bottom.accountData { 586 // Skip any account not covered yet by the snapshot 587 if base.genMarker != nil && bytes.Compare(hash[:], base.genMarker) > 0 { 588 continue 589 } 590 // Push the account to disk 591 rawdb.WriteAccountSnapshot(batch, hash, data) 592 base.cache.Set(hash[:], data) 593 snapshotCleanAccountWriteMeter.Mark(int64(len(data))) 594 595 snapshotFlushAccountItemMeter.Mark(1) 596 snapshotFlushAccountSizeMeter.Mark(int64(len(data))) 597 598 // Ensure we don't write too much data blindly. It's ok to flush, the 599 // root will go missing in case of a crash and we'll detect and regen 600 // the snapshot. 601 if batch.ValueSize() > 64*1024*1024 { 602 if err := batch.Write(); err != nil { 603 log.Crit("Failed to write storage deletions", "err", err) 604 } 605 batch.Reset() 606 } 607 } 608 // Push all the storage slots into the database 609 for accountHash, storage := range bottom.storageData { 610 // Skip any account not covered yet by the snapshot 611 if base.genMarker != nil && bytes.Compare(accountHash[:], base.genMarker) > 0 { 612 continue 613 } 614 // Generation might be mid-account, track that case too 615 midAccount := base.genMarker != nil && bytes.Equal(accountHash[:], base.genMarker[:common.HashLength]) 616 617 for storageHash, data := range storage { 618 // Skip any slot not covered yet by the snapshot 619 if midAccount && bytes.Compare(storageHash[:], base.genMarker[common.HashLength:]) > 0 { 620 continue 621 } 622 if len(data) > 0 { 623 rawdb.WriteStorageSnapshot(batch, accountHash, storageHash, data) 624 base.cache.Set(append(accountHash[:], storageHash[:]...), data) 625 snapshotCleanStorageWriteMeter.Mark(int64(len(data))) 626 } else { 627 rawdb.DeleteStorageSnapshot(batch, accountHash, storageHash) 628 base.cache.Set(append(accountHash[:], storageHash[:]...), nil) 629 } 630 snapshotFlushStorageItemMeter.Mark(1) 631 snapshotFlushStorageSizeMeter.Mark(int64(len(data))) 632 } 633 } 634 // Update the snapshot block marker and write any remainder data 635 rawdb.WriteSnapshotRoot(batch, bottom.root) 636 637 // Write out the generator progress marker and report 638 journalProgress(batch, base.genMarker, stats) 639 640 // Flush all the updates in the single db operation. Ensure the 641 // disk layer transition is atomic. 642 if err := batch.Write(); err != nil { 643 log.Crit("Failed to write leftover snapshot", "err", err) 644 } 645 log.Debug("Journalled disk layer", "root", bottom.root, "complete", base.genMarker == nil) 646 res := &diskLayer{ 647 root: bottom.root, 648 cache: base.cache, 649 diskdb: base.diskdb, 650 triedb: base.triedb, 651 genMarker: base.genMarker, 652 genPending: base.genPending, 653 } 654 // If snapshot generation hasn't finished yet, port over all the starts and 655 // continue where the previous round left off. 656 // 657 // Note, the `base.genAbort` comparison is not used normally, it's checked 658 // to allow the tests to play with the marker without triggering this path. 659 if base.genMarker != nil && base.genAbort != nil { 660 res.genMarker = base.genMarker 661 res.genAbort = make(chan chan *generatorStats) 662 go res.generate(stats) 663 } 664 return res 665 } 666 667 // Release releases resources 668 func (t *Tree) Release() { 669 if dl := t.disklayer(); dl != nil { 670 dl.Release() 671 } 672 } 673 674 // Journal commits an entire diff hierarchy to disk into a single journal entry. 675 // This is meant to be used during shutdown to persist the snapshot without 676 // flattening everything down (bad for reorgs). 677 // 678 // The method returns the root hash of the base layer that needs to be persisted 679 // to disk as a trie too to allow continuing any pending generation op. 680 func (t *Tree) Journal(root common.Hash) (common.Hash, error) { 681 // Retrieve the head snapshot to journal from var snap snapshot 682 snap := t.Snapshot(root) 683 if snap == nil { 684 return common.Hash{}, fmt.Errorf("snapshot [%#x] missing", root) 685 } 686 // Run the journaling 687 t.lock.Lock() 688 defer t.lock.Unlock() 689 690 // Firstly write out the metadata of journal 691 journal := new(bytes.Buffer) 692 if err := rlp.Encode(journal, journalVersion); err != nil { 693 return common.Hash{}, err 694 } 695 diskroot := t.diskRoot() 696 if diskroot == (common.Hash{}) { 697 return common.Hash{}, errors.New("invalid disk root") 698 } 699 // Secondly write out the disk layer root, ensure the 700 // diff journal is continuous with disk. 701 if err := rlp.Encode(journal, diskroot); err != nil { 702 return common.Hash{}, err 703 } 704 // Finally write out the journal of each layer in reverse order. 705 base, err := snap.(snapshot).Journal(journal) 706 if err != nil { 707 return common.Hash{}, err 708 } 709 // Store the journal into the database and return 710 rawdb.WriteSnapshotJournal(t.diskdb, journal.Bytes()) 711 return base, nil 712 } 713 714 // Rebuild wipes all available snapshot data from the persistent database and 715 // discard all caches and diff layers. Afterwards, it starts a new snapshot 716 // generator with the given root hash. 717 func (t *Tree) Rebuild(root common.Hash) { 718 t.lock.Lock() 719 defer t.lock.Unlock() 720 721 // Firstly delete any recovery flag in the database. Because now we are 722 // building a brand new snapshot. Also reenable the snapshot feature. 723 rawdb.DeleteSnapshotRecoveryNumber(t.diskdb) 724 rawdb.DeleteSnapshotDisabled(t.diskdb) 725 726 // Iterate over and mark all layers stale 727 for _, layer := range t.layers { 728 switch layer := layer.(type) { 729 case *diskLayer: 730 // If the base layer is generating, abort it and save 731 if layer.genAbort != nil { 732 abort := make(chan *generatorStats) 733 layer.genAbort <- abort 734 <-abort 735 } 736 // Layer should be inactive now, mark it as stale 737 layer.lock.Lock() 738 layer.stale = true 739 layer.lock.Unlock() 740 741 case *diffLayer: 742 // If the layer is a simple diff, simply mark as stale 743 layer.lock.Lock() 744 layer.stale.Store(true) 745 layer.lock.Unlock() 746 747 default: 748 panic(fmt.Sprintf("unknown layer type: %T", layer)) 749 } 750 } 751 // Start generating a new snapshot from scratch on a background thread. The 752 // generator will run a wiper first if there's not one running right now. 753 log.Info("Rebuilding state snapshot") 754 t.layers = map[common.Hash]snapshot{ 755 root: generateSnapshot(t.diskdb, t.triedb, t.config.CacheSize, root), 756 } 757 } 758 759 // AccountIterator creates a new account iterator for the specified root hash and 760 // seeks to a starting account hash. 761 func (t *Tree) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) { 762 ok, err := t.generating() 763 if err != nil { 764 return nil, err 765 } 766 if ok { 767 return nil, ErrNotConstructed 768 } 769 return newFastAccountIterator(t, root, seek) 770 } 771 772 // StorageIterator creates a new storage iterator for the specified root hash and 773 // account. The iterator will be move to the specific start position. 774 func (t *Tree) StorageIterator(root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) { 775 ok, err := t.generating() 776 if err != nil { 777 return nil, err 778 } 779 if ok { 780 return nil, ErrNotConstructed 781 } 782 return newFastStorageIterator(t, root, account, seek) 783 } 784 785 // Verify iterates the whole state(all the accounts as well as the corresponding storages) 786 // with the specific root and compares the re-computed hash with the original one. 787 func (t *Tree) Verify(root common.Hash) error { 788 acctIt, err := t.AccountIterator(root, common.Hash{}) 789 if err != nil { 790 return err 791 } 792 defer acctIt.Release() 793 794 got, err := generateTrieRoot(nil, "", acctIt, common.Hash{}, stackTrieGenerate, func(db ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) { 795 storageIt, err := t.StorageIterator(root, accountHash, common.Hash{}) 796 if err != nil { 797 return common.Hash{}, err 798 } 799 defer storageIt.Release() 800 801 hash, err := generateTrieRoot(nil, "", storageIt, accountHash, stackTrieGenerate, nil, stat, false) 802 if err != nil { 803 return common.Hash{}, err 804 } 805 return hash, nil 806 }, newGenerateStats(), true) 807 808 if err != nil { 809 return err 810 } 811 if got != root { 812 return fmt.Errorf("state root hash mismatch: got %x, want %x", got, root) 813 } 814 return nil 815 } 816 817 // disklayer is an internal helper function to return the disk layer. 818 // The lock of snapTree is assumed to be held already. 819 func (t *Tree) disklayer() *diskLayer { 820 var snap snapshot 821 for _, s := range t.layers { 822 snap = s 823 break 824 } 825 if snap == nil { 826 return nil 827 } 828 switch layer := snap.(type) { 829 case *diskLayer: 830 return layer 831 case *diffLayer: 832 return layer.origin 833 default: 834 panic(fmt.Sprintf("%T: undefined layer", snap)) 835 } 836 } 837 838 // diskRoot is an internal helper function to return the disk layer root. 839 // The lock of snapTree is assumed to be held already. 840 func (t *Tree) diskRoot() common.Hash { 841 disklayer := t.disklayer() 842 if disklayer == nil { 843 return common.Hash{} 844 } 845 return disklayer.Root() 846 } 847 848 // generating is an internal helper function which reports whether the snapshot 849 // is still under the construction. 850 func (t *Tree) generating() (bool, error) { 851 t.lock.Lock() 852 defer t.lock.Unlock() 853 854 layer := t.disklayer() 855 if layer == nil { 856 return false, errors.New("disk layer is missing") 857 } 858 layer.lock.RLock() 859 defer layer.lock.RUnlock() 860 return layer.genMarker != nil, nil 861 } 862 863 // DiskRoot is a external helper function to return the disk layer root. 864 func (t *Tree) DiskRoot() common.Hash { 865 t.lock.Lock() 866 defer t.lock.Unlock() 867 868 return t.diskRoot() 869 } 870 871 // Size returns the memory usage of the diff layers above the disk layer and the 872 // dirty nodes buffered in the disk layer. Currently, the implementation uses a 873 // special diff layer (the first) as an aggregator simulating a dirty buffer, so 874 // the second return will always be 0. However, this will be made consistent with 875 // the pathdb, which will require a second return. 876 func (t *Tree) Size() (diffs common.StorageSize, buf common.StorageSize) { 877 t.lock.RLock() 878 defer t.lock.RUnlock() 879 880 var size common.StorageSize 881 for _, layer := range t.layers { 882 if layer, ok := layer.(*diffLayer); ok { 883 size += common.StorageSize(layer.memory) 884 } 885 } 886 return size, 0 887 }