github.com/fff-chain/go-fff@v0.0.0-20220726032732-1c84420b8a99/core/state/pruner/pruner.go (about) 1 // Copyright 2020 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package pruner 18 19 import ( 20 "bytes" 21 "encoding/binary" 22 "errors" 23 "fmt" 24 "math" 25 "os" 26 "path/filepath" 27 "strings" 28 "time" 29 30 "github.com/prometheus/tsdb/fileutil" 31 32 "github.com/fff-chain/go-fff/common" 33 "github.com/fff-chain/go-fff/consensus" 34 "github.com/fff-chain/go-fff/core/rawdb" 35 "github.com/fff-chain/go-fff/core/state" 36 "github.com/fff-chain/go-fff/core/state/snapshot" 37 "github.com/fff-chain/go-fff/core/types" 38 "github.com/fff-chain/go-fff/crypto" 39 "github.com/fff-chain/go-fff/ethdb" 40 "github.com/fff-chain/go-fff/log" 41 "github.com/fff-chain/go-fff/node" 42 "github.com/fff-chain/go-fff/rlp" 43 "github.com/fff-chain/go-fff/trie" 44 ) 45 46 const ( 47 // stateBloomFilePrefix is the filename prefix of state bloom filter. 48 stateBloomFilePrefix = "statebloom" 49 50 // stateBloomFilePrefix is the filename suffix of state bloom filter. 51 stateBloomFileSuffix = "bf.gz" 52 53 // stateBloomFileTempSuffix is the filename suffix of state bloom filter 54 // while it is being written out to detect write aborts. 55 stateBloomFileTempSuffix = ".tmp" 56 57 // rangeCompactionThreshold is the minimal deleted entry number for 58 // triggering range compaction. It's a quite arbitrary number but just 59 // to avoid triggering range compaction because of small deletion. 60 rangeCompactionThreshold = 100000 61 ) 62 63 var ( 64 // emptyRoot is the known root hash of an empty trie. 65 emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") 66 67 // emptyCode is the known hash of the empty EVM bytecode. 68 emptyCode = crypto.Keccak256(nil) 69 ) 70 71 // Pruner is an offline tool to prune the stale state with the 72 // help of the snapshot. The workflow of pruner is very simple: 73 // 74 // - iterate the snapshot, reconstruct the relevant state 75 // - iterate the database, delete all other state entries which 76 // don't belong to the target state and the genesis state 77 // 78 // It can take several hours(around 2 hours for mainnet) to finish 79 // the whole pruning work. It's recommended to run this offline tool 80 // periodically in order to release the disk usage and improve the 81 // disk read performance to some extent. 82 type Pruner struct { 83 db ethdb.Database 84 stateBloom *stateBloom 85 datadir string 86 trieCachePath string 87 headHeader *types.Header 88 snaptree *snapshot.Tree 89 triesInMemory uint64 90 } 91 92 type BlockPruner struct { 93 db ethdb.Database 94 oldAncientPath string 95 newAncientPath string 96 node *node.Node 97 BlockAmountReserved uint64 98 } 99 100 // NewPruner creates the pruner instance. 101 func NewPruner(db ethdb.Database, datadir, trieCachePath string, bloomSize, triesInMemory uint64) (*Pruner, error) { 102 headBlock := rawdb.ReadHeadBlock(db) 103 if headBlock == nil { 104 return nil, errors.New("Failed to load head block") 105 } 106 snaptree, err := snapshot.New(db, trie.NewDatabase(db), 256, int(triesInMemory), headBlock.Root(), false, false, false) 107 if err != nil { 108 return nil, err // The relevant snapshot(s) might not exist 109 } 110 // Sanitize the bloom filter size if it's too small. 111 if bloomSize < 256 { 112 log.Warn("Sanitizing bloomfilter size", "provided(MB)", bloomSize, "updated(MB)", 256) 113 bloomSize = 256 114 } 115 stateBloom, err := newStateBloomWithSize(bloomSize) 116 117 if err != nil { 118 return nil, err 119 } 120 return &Pruner{ 121 db: db, 122 stateBloom: stateBloom, 123 datadir: datadir, 124 trieCachePath: trieCachePath, 125 triesInMemory: triesInMemory, 126 headHeader: headBlock.Header(), 127 snaptree: snaptree, 128 }, nil 129 } 130 131 func NewBlockPruner(db ethdb.Database, n *node.Node, oldAncientPath, newAncientPath string, BlockAmountReserved uint64) *BlockPruner { 132 return &BlockPruner{ 133 db: db, 134 oldAncientPath: oldAncientPath, 135 newAncientPath: newAncientPath, 136 node: n, 137 BlockAmountReserved: BlockAmountReserved, 138 } 139 } 140 141 func prune(snaptree *snapshot.Tree, root common.Hash, maindb ethdb.Database, stateBloom *stateBloom, bloomPath string, middleStateRoots map[common.Hash]struct{}, start time.Time) error { 142 // Delete all stale trie nodes in the disk. With the help of state bloom 143 // the trie nodes(and codes) belong to the active state will be filtered 144 // out. A very small part of stale tries will also be filtered because of 145 // the false-positive rate of bloom filter. But the assumption is held here 146 // that the false-positive is low enough(~0.05%). The probablity of the 147 // dangling node is the state root is super low. So the dangling nodes in 148 // theory will never ever be visited again. 149 var ( 150 count int 151 size common.StorageSize 152 pstart = time.Now() 153 logged = time.Now() 154 batch = maindb.NewBatch() 155 iter = maindb.NewIterator(nil, nil) 156 ) 157 for iter.Next() { 158 key := iter.Key() 159 160 // All state entries don't belong to specific state and genesis are deleted here 161 // - trie node 162 // - legacy contract code 163 // - new-scheme contract code 164 isCode, codeKey := rawdb.IsCodeKey(key) 165 if len(key) == common.HashLength || isCode { 166 checkKey := key 167 if isCode { 168 checkKey = codeKey 169 } 170 if _, exist := middleStateRoots[common.BytesToHash(checkKey)]; exist { 171 log.Debug("Forcibly delete the middle state roots", "hash", common.BytesToHash(checkKey)) 172 } else { 173 if ok, err := stateBloom.Contain(checkKey); err != nil { 174 return err 175 } else if ok { 176 continue 177 } 178 } 179 count += 1 180 size += common.StorageSize(len(key) + len(iter.Value())) 181 batch.Delete(key) 182 183 var eta time.Duration // Realistically will never remain uninited 184 if done := binary.BigEndian.Uint64(key[:8]); done > 0 { 185 var ( 186 left = math.MaxUint64 - binary.BigEndian.Uint64(key[:8]) 187 speed = done/uint64(time.Since(pstart)/time.Millisecond+1) + 1 // +1s to avoid division by zero 188 ) 189 eta = time.Duration(left/speed) * time.Millisecond 190 } 191 if time.Since(logged) > 8*time.Second { 192 log.Info("Pruning state data", "nodes", count, "size", size, 193 "elapsed", common.PrettyDuration(time.Since(pstart)), "eta", common.PrettyDuration(eta)) 194 logged = time.Now() 195 } 196 // Recreate the iterator after every batch commit in order 197 // to allow the underlying compactor to delete the entries. 198 if batch.ValueSize() >= ethdb.IdealBatchSize { 199 batch.Write() 200 batch.Reset() 201 202 iter.Release() 203 iter = maindb.NewIterator(nil, key) 204 } 205 } 206 } 207 if batch.ValueSize() > 0 { 208 batch.Write() 209 batch.Reset() 210 } 211 iter.Release() 212 log.Info("Pruned state data", "nodes", count, "size", size, "elapsed", common.PrettyDuration(time.Since(pstart))) 213 214 // Pruning is done, now drop the "useless" layers from the snapshot. 215 // Firstly, flushing the target layer into the disk. After that all 216 // diff layers below the target will all be merged into the disk. 217 if root != snaptree.DiskRoot() { 218 if err := snaptree.Cap(root, 0); err != nil { 219 return err 220 } 221 } 222 // Secondly, flushing the snapshot journal into the disk. All diff 223 // layers upon are dropped silently. Eventually the entire snapshot 224 // tree is converted into a single disk layer with the pruning target 225 // as the root. 226 if _, err := snaptree.Journal(root); err != nil { 227 return err 228 } 229 // Delete the state bloom, it marks the entire pruning procedure is 230 // finished. If any crashes or manual exit happens before this, 231 // `RecoverPruning` will pick it up in the next restarts to redo all 232 // the things. 233 os.RemoveAll(bloomPath) 234 235 // Start compactions, will remove the deleted data from the disk immediately. 236 // Note for small pruning, the compaction is skipped. 237 if count >= rangeCompactionThreshold { 238 cstart := time.Now() 239 for b := 0x00; b <= 0xf0; b += 0x10 { 240 var ( 241 start = []byte{byte(b)} 242 end = []byte{byte(b + 0x10)} 243 ) 244 if b == 0xf0 { 245 end = nil 246 } 247 log.Info("Compacting database", "range", fmt.Sprintf("%#x-%#x", start, end), "elapsed", common.PrettyDuration(time.Since(cstart))) 248 if err := maindb.Compact(start, end); err != nil { 249 log.Error("Database compaction failed", "error", err) 250 return err 251 } 252 } 253 log.Info("Database compaction finished", "elapsed", common.PrettyDuration(time.Since(cstart))) 254 } 255 log.Info("State pruning successful", "pruned", size, "elapsed", common.PrettyDuration(time.Since(start))) 256 return nil 257 } 258 259 func (p *BlockPruner) backUpOldDb(name string, cache, handles int, namespace string, readonly, interrupt bool) error { 260 // Open old db wrapper. 261 chainDb, err := p.node.OpenDatabaseWithFreezer(name, cache, handles, p.oldAncientPath, namespace, readonly, true, interrupt) 262 if err != nil { 263 log.Error("Failed to open ancient database", "err=", err) 264 return err 265 } 266 defer chainDb.Close() 267 log.Info("chainDB opened successfully") 268 269 // Get the number of items in old ancient db. 270 itemsOfAncient, err := chainDb.ItemAmountInAncient() 271 log.Info("the number of items in ancientDB is ", "itemsOfAncient", itemsOfAncient) 272 273 // If we can't access the freezer or it's empty, abort. 274 if err != nil || itemsOfAncient == 0 { 275 log.Error("can't access the freezer or it's empty, abort") 276 return errors.New("can't access the freezer or it's empty, abort") 277 } 278 279 // If the items in freezer is less than the block amount that we want to reserve, it is not enough, should stop. 280 if itemsOfAncient < p.BlockAmountReserved { 281 log.Error("the number of old blocks is not enough to reserve,", "ancient items", itemsOfAncient, "the amount specified", p.BlockAmountReserved) 282 return errors.New("the number of old blocks is not enough to reserve") 283 } 284 285 var oldOffSet uint64 286 if interrupt { 287 // The interrupt scecario within this function is specific for old and new ancientDB exsisted concurrently, 288 // should use last version of offset for oldAncientDB, because current offset is 289 // actually of the new ancientDB_Backup, but what we want is the offset of ancientDB being backup. 290 oldOffSet = rawdb.ReadOffSetOfLastAncientFreezer(chainDb) 291 } else { 292 // Using current version of ancientDB for oldOffSet because the db for backup is current version. 293 oldOffSet = rawdb.ReadOffSetOfCurrentAncientFreezer(chainDb) 294 } 295 log.Info("the oldOffSet is ", "oldOffSet", oldOffSet) 296 297 // Get the start BlockNumber for pruning. 298 startBlockNumber := oldOffSet + itemsOfAncient - p.BlockAmountReserved 299 log.Info("new offset/new startBlockNumber is ", "new offset", startBlockNumber) 300 301 // Create new ancientdb backup and record the new and last version of offset in kvDB as well. 302 // For every round, newoffset actually equals to the startBlockNumber in ancient backup db. 303 frdbBack, err := rawdb.NewFreezerDb(chainDb, p.newAncientPath, namespace, readonly, startBlockNumber) 304 if err != nil { 305 log.Error("Failed to create ancient freezer backup", "err=", err) 306 return err 307 } 308 defer frdbBack.Close() 309 310 offsetBatch := chainDb.NewBatch() 311 rawdb.WriteOffSetOfCurrentAncientFreezer(offsetBatch, startBlockNumber) 312 rawdb.WriteOffSetOfLastAncientFreezer(offsetBatch, oldOffSet) 313 if err := offsetBatch.Write(); err != nil { 314 log.Crit("Failed to write offset into disk", "err", err) 315 } 316 317 // It's guaranteed that the old/new offsets are updated as well as the new ancientDB are created if this flock exist. 318 lock, _, err := fileutil.Flock(filepath.Join(p.newAncientPath, "PRUNEFLOCKBACK")) 319 if err != nil { 320 log.Error("file lock error", "err", err) 321 return err 322 } 323 324 log.Info("prune info", "old offset", oldOffSet, "number of items in ancientDB", itemsOfAncient, "amount to reserve", p.BlockAmountReserved) 325 log.Info("new offset/new startBlockNumber recorded successfully ", "new offset", startBlockNumber) 326 327 start := time.Now() 328 // All ancient data after and including startBlockNumber should write into new ancientDB ancient_back. 329 for blockNumber := startBlockNumber; blockNumber < itemsOfAncient+oldOffSet; blockNumber++ { 330 blockHash := rawdb.ReadCanonicalHash(chainDb, blockNumber) 331 block := rawdb.ReadBlock(chainDb, blockHash, blockNumber) 332 receipts := rawdb.ReadRawReceipts(chainDb, blockHash, blockNumber) 333 // Calculate the total difficulty of the block 334 td := rawdb.ReadTd(chainDb, blockHash, blockNumber) 335 if td == nil { 336 return consensus.ErrUnknownAncestor 337 } 338 // Write into new ancient_back db. 339 rawdb.WriteAncientBlock(frdbBack, block, receipts, td) 340 // Print the log every 5s for better trace. 341 if common.PrettyDuration(time.Since(start)) > common.PrettyDuration(5*time.Second) { 342 log.Info("block backup process running successfully", "current blockNumber for backup", blockNumber) 343 start = time.Now() 344 } 345 } 346 lock.Release() 347 log.Info("block back up done", "current start blockNumber in ancientDB", startBlockNumber) 348 return nil 349 } 350 351 // Backup the ancient data for the old ancient db, i.e. the most recent 128 blocks in ancient db. 352 func (p *BlockPruner) BlockPruneBackUp(name string, cache, handles int, namespace string, readonly, interrupt bool) error { 353 354 start := time.Now() 355 356 if err := p.backUpOldDb(name, cache, handles, namespace, readonly, interrupt); err != nil { 357 return err 358 } 359 360 log.Info("Block pruning BackUp successfully", "time duration since start is", common.PrettyDuration(time.Since(start))) 361 return nil 362 } 363 364 func (p *BlockPruner) RecoverInterruption(name string, cache, handles int, namespace string, readonly bool) error { 365 log.Info("RecoverInterruption for block prune") 366 newExist, err := CheckFileExist(p.newAncientPath) 367 if err != nil { 368 log.Error("newAncientDb path error") 369 return err 370 } 371 372 if newExist { 373 log.Info("New ancientDB_backup existed in interruption scenario") 374 flockOfAncientBack, err := CheckFileExist(filepath.Join(p.newAncientPath, "PRUNEFLOCKBACK")) 375 if err != nil { 376 log.Error("Failed to check flock of ancientDB_Back %v", err) 377 return err 378 } 379 380 // Indicating both old and new ancientDB existed concurrently. 381 // Delete directly for the new ancientdb to prune from start, e.g.: path ../chaindb/ancient_backup 382 if err := os.RemoveAll(p.newAncientPath); err != nil { 383 log.Error("Failed to remove old ancient directory %v", err) 384 return err 385 } 386 if flockOfAncientBack { 387 // Indicating the oldOffset/newOffset have already been updated. 388 if err := p.BlockPruneBackUp(name, cache, handles, namespace, readonly, true); err != nil { 389 log.Error("Failed to prune") 390 return err 391 } 392 } else { 393 // Indicating the flock did not exist and the new offset did not be updated, so just handle this case as usual. 394 if err := p.BlockPruneBackUp(name, cache, handles, namespace, readonly, false); err != nil { 395 log.Error("Failed to prune") 396 return err 397 } 398 } 399 400 if err := p.AncientDbReplacer(); err != nil { 401 log.Error("Failed to replace ancientDB") 402 return err 403 } 404 } else { 405 log.Info("New ancientDB_backup did not exist in interruption scenario") 406 // Indicating new ancientDB even did not be created, just prune starting at backup from startBlockNumber as usual, 407 // in this case, the new offset have not been written into kvDB. 408 if err := p.BlockPruneBackUp(name, cache, handles, namespace, readonly, false); err != nil { 409 log.Error("Failed to prune") 410 return err 411 } 412 if err := p.AncientDbReplacer(); err != nil { 413 log.Error("Failed to replace ancientDB") 414 return err 415 } 416 } 417 418 return nil 419 } 420 421 func CheckFileExist(path string) (bool, error) { 422 if _, err := os.Stat(path); err != nil { 423 if os.IsNotExist(err) { 424 // Indicating the file didn't exist. 425 return false, nil 426 } 427 return true, err 428 } 429 return true, nil 430 } 431 432 func (p *BlockPruner) AncientDbReplacer() error { 433 // Delete directly for the old ancientdb, e.g.: path ../chaindb/ancient 434 if err := os.RemoveAll(p.oldAncientPath); err != nil { 435 log.Error("Failed to remove old ancient directory %v", err) 436 return err 437 } 438 439 // Rename the new ancientdb path same to the old 440 if err := os.Rename(p.newAncientPath, p.oldAncientPath); err != nil { 441 log.Error("Failed to rename new ancient directory") 442 return err 443 } 444 return nil 445 } 446 447 // Prune deletes all historical state nodes except the nodes belong to the 448 // specified state version. If user doesn't specify the state version, use 449 // the bottom-most snapshot diff layer as the target. 450 func (p *Pruner) Prune(root common.Hash) error { 451 // If the state bloom filter is already committed previously, 452 // reuse it for pruning instead of generating a new one. It's 453 // mandatory because a part of state may already be deleted, 454 // the recovery procedure is necessary. 455 _, stateBloomRoot, err := findBloomFilter(p.datadir) 456 if err != nil { 457 return err 458 } 459 if stateBloomRoot != (common.Hash{}) { 460 return RecoverPruning(p.datadir, p.db, p.trieCachePath, p.triesInMemory) 461 } 462 // If the target state root is not specified, use the HEAD-(n-1) as the 463 // target. The reason for picking it is: 464 // - in most of the normal cases, the related state is available 465 // - the probability of this layer being reorg is very low 466 var layers []snapshot.Snapshot 467 if root == (common.Hash{}) { 468 // Retrieve all snapshot layers from the current HEAD. 469 // In theory there are n difflayers + 1 disk layer present, 470 // so n diff layers are expected to be returned. 471 layers = p.snaptree.Snapshots(p.headHeader.Root, int(p.triesInMemory), true) 472 if len(layers) != int(p.triesInMemory) { 473 // Reject if the accumulated diff layers are less than n. It 474 // means in most of normal cases, there is no associated state 475 // with bottom-most diff layer. 476 return fmt.Errorf("snapshot not old enough yet: need %d more blocks", int(p.triesInMemory)-len(layers)) 477 } 478 // Use the bottom-most diff layer as the target 479 root = layers[len(layers)-1].Root() 480 } 481 // Ensure the root is really present. The weak assumption 482 // is the presence of root can indicate the presence of the 483 // entire trie. 484 if blob := rawdb.ReadTrieNode(p.db, root); len(blob) == 0 { 485 // The special case is for clique based networks(rinkeby, goerli 486 // and some other private networks), it's possible that two 487 // consecutive blocks will have same root. In this case snapshot 488 // difflayer won't be created. So HEAD-(n-1) may not paired with 489 // head-(n-1) layer. Instead the paired layer is higher than the 490 // bottom-most diff layer. Try to find the bottom-most snapshot 491 // layer with state available. 492 // 493 // Note HEAD is ignored. Usually there is the associated 494 // state available, but we don't want to use the topmost state 495 // as the pruning target. 496 var found bool 497 for i := len(layers) - 2; i >= 1; i-- { 498 if blob := rawdb.ReadTrieNode(p.db, layers[i].Root()); len(blob) != 0 { 499 root = layers[i].Root() 500 found = true 501 log.Info("Selecting middle-layer as the pruning target", "root", root, "depth", i) 502 break 503 } 504 } 505 if !found { 506 if blob := rawdb.ReadTrieNode(p.db, p.snaptree.DiskRoot()); len(blob) != 0 { 507 root = p.snaptree.DiskRoot() 508 found = true 509 log.Info("Selecting disk-layer as the pruning target", "root", root) 510 } 511 } 512 if !found { 513 if len(layers) > 0 { 514 return errors.New("no snapshot paired state") 515 } 516 return fmt.Errorf("associated state[%x] is not present", root) 517 } 518 } else { 519 if len(layers) > 0 { 520 log.Info("Selecting bottom-most difflayer as the pruning target", "root", root, "height", p.headHeader.Number.Uint64()-127) 521 } else { 522 log.Info("Selecting user-specified state as the pruning target", "root", root) 523 } 524 } 525 // Before start the pruning, delete the clean trie cache first. 526 // It's necessary otherwise in the next restart we will hit the 527 // deleted state root in the "clean cache" so that the incomplete 528 // state is picked for usage. 529 deleteCleanTrieCache(p.trieCachePath) 530 531 // All the state roots of the middle layer should be forcibly pruned, 532 // otherwise the dangling state will be left. 533 middleRoots := make(map[common.Hash]struct{}) 534 for _, layer := range layers { 535 if layer.Root() == root { 536 break 537 } 538 middleRoots[layer.Root()] = struct{}{} 539 } 540 // Traverse the target state, re-construct the whole state trie and 541 // commit to the given bloom filter. 542 start := time.Now() 543 if err := snapshot.GenerateTrie(p.snaptree, root, p.db, p.stateBloom); err != nil { 544 return err 545 } 546 // Traverse the genesis, put all genesis state entries into the 547 // bloom filter too. 548 if err := extractGenesis(p.db, p.stateBloom); err != nil { 549 return err 550 } 551 filterName := bloomFilterName(p.datadir, root) 552 553 log.Info("Writing state bloom to disk", "name", filterName) 554 if err := p.stateBloom.Commit(filterName, filterName+stateBloomFileTempSuffix); err != nil { 555 return err 556 } 557 log.Info("State bloom filter committed", "name", filterName) 558 return prune(p.snaptree, root, p.db, p.stateBloom, filterName, middleRoots, start) 559 } 560 561 // RecoverPruning will resume the pruning procedure during the system restart. 562 // This function is used in this case: user tries to prune state data, but the 563 // system was interrupted midway because of crash or manual-kill. In this case 564 // if the bloom filter for filtering active state is already constructed, the 565 // pruning can be resumed. What's more if the bloom filter is constructed, the 566 // pruning **has to be resumed**. Otherwise a lot of dangling nodes may be left 567 // in the disk. 568 func RecoverPruning(datadir string, db ethdb.Database, trieCachePath string, triesInMemory uint64) error { 569 stateBloomPath, stateBloomRoot, err := findBloomFilter(datadir) 570 if err != nil { 571 return err 572 } 573 if stateBloomPath == "" { 574 return nil // nothing to recover 575 } 576 headBlock := rawdb.ReadHeadBlock(db) 577 if headBlock == nil { 578 return errors.New("Failed to load head block") 579 } 580 // Initialize the snapshot tree in recovery mode to handle this special case: 581 // - Users run the `prune-state` command multiple times 582 // - Neither these `prune-state` running is finished(e.g. interrupted manually) 583 // - The state bloom filter is already generated, a part of state is deleted, 584 // so that resuming the pruning here is mandatory 585 // - The state HEAD is rewound already because of multiple incomplete `prune-state` 586 // In this case, even the state HEAD is not exactly matched with snapshot, it 587 // still feasible to recover the pruning correctly. 588 snaptree, err := snapshot.New(db, trie.NewDatabase(db), 256, int(triesInMemory), headBlock.Root(), false, false, true) 589 if err != nil { 590 return err // The relevant snapshot(s) might not exist 591 } 592 stateBloom, err := NewStateBloomFromDisk(stateBloomPath) 593 if err != nil { 594 return err 595 } 596 log.Info("Loaded state bloom filter", "path", stateBloomPath) 597 598 // Before start the pruning, delete the clean trie cache first. 599 // It's necessary otherwise in the next restart we will hit the 600 // deleted state root in the "clean cache" so that the incomplete 601 // state is picked for usage. 602 deleteCleanTrieCache(trieCachePath) 603 604 // All the state roots of the middle layers should be forcibly pruned, 605 // otherwise the dangling state will be left. 606 var ( 607 found bool 608 layers = snaptree.Snapshots(headBlock.Root(), int(triesInMemory), true) 609 middleRoots = make(map[common.Hash]struct{}) 610 ) 611 for _, layer := range layers { 612 if layer.Root() == stateBloomRoot { 613 found = true 614 break 615 } 616 middleRoots[layer.Root()] = struct{}{} 617 } 618 if !found { 619 log.Error("Pruning target state is not existent") 620 return errors.New("non-existent target state") 621 } 622 return prune(snaptree, stateBloomRoot, db, stateBloom, stateBloomPath, middleRoots, time.Now()) 623 } 624 625 // extractGenesis loads the genesis state and commits all the state entries 626 // into the given bloomfilter. 627 func extractGenesis(db ethdb.Database, stateBloom *stateBloom) error { 628 genesisHash := rawdb.ReadCanonicalHash(db, 0) 629 if genesisHash == (common.Hash{}) { 630 return errors.New("missing genesis hash") 631 } 632 genesis := rawdb.ReadBlock(db, genesisHash, 0) 633 if genesis == nil { 634 return errors.New("missing genesis block") 635 } 636 t, err := trie.NewSecure(genesis.Root(), trie.NewDatabase(db)) 637 if err != nil { 638 return err 639 } 640 accIter := t.NodeIterator(nil) 641 for accIter.Next(true) { 642 hash := accIter.Hash() 643 644 // Embedded nodes don't have hash. 645 if hash != (common.Hash{}) { 646 stateBloom.Put(hash.Bytes(), nil) 647 } 648 // If it's a leaf node, yes we are touching an account, 649 // dig into the storage trie further. 650 if accIter.Leaf() { 651 var acc state.Account 652 if err := rlp.DecodeBytes(accIter.LeafBlob(), &acc); err != nil { 653 return err 654 } 655 if acc.Root != emptyRoot { 656 storageTrie, err := trie.NewSecure(acc.Root, trie.NewDatabase(db)) 657 if err != nil { 658 return err 659 } 660 storageIter := storageTrie.NodeIterator(nil) 661 for storageIter.Next(true) { 662 hash := storageIter.Hash() 663 if hash != (common.Hash{}) { 664 stateBloom.Put(hash.Bytes(), nil) 665 } 666 } 667 if storageIter.Error() != nil { 668 return storageIter.Error() 669 } 670 } 671 if !bytes.Equal(acc.CodeHash, emptyCode) { 672 stateBloom.Put(acc.CodeHash, nil) 673 } 674 } 675 } 676 return accIter.Error() 677 } 678 679 func bloomFilterName(datadir string, hash common.Hash) string { 680 return filepath.Join(datadir, fmt.Sprintf("%s.%s.%s", stateBloomFilePrefix, hash.Hex(), stateBloomFileSuffix)) 681 } 682 683 func isBloomFilter(filename string) (bool, common.Hash) { 684 filename = filepath.Base(filename) 685 if strings.HasPrefix(filename, stateBloomFilePrefix) && strings.HasSuffix(filename, stateBloomFileSuffix) { 686 return true, common.HexToHash(filename[len(stateBloomFilePrefix)+1 : len(filename)-len(stateBloomFileSuffix)-1]) 687 } 688 return false, common.Hash{} 689 } 690 691 func findBloomFilter(datadir string) (string, common.Hash, error) { 692 var ( 693 stateBloomPath string 694 stateBloomRoot common.Hash 695 ) 696 if err := filepath.Walk(datadir, func(path string, info os.FileInfo, err error) error { 697 if info != nil && !info.IsDir() { 698 ok, root := isBloomFilter(path) 699 if ok { 700 stateBloomPath = path 701 stateBloomRoot = root 702 } 703 } 704 return nil 705 }); err != nil { 706 return "", common.Hash{}, err 707 } 708 return stateBloomPath, stateBloomRoot, nil 709 } 710 711 const warningLog = ` 712 713 WARNING! 714 715 The clean trie cache is not found. Please delete it by yourself after the 716 pruning. Remember don't start the Geth without deleting the clean trie cache 717 otherwise the entire database may be damaged! 718 719 Check the command description "geth snapshot prune-state --help" for more details. 720 ` 721 722 func deleteCleanTrieCache(path string) { 723 if _, err := os.Stat(path); os.IsNotExist(err) { 724 log.Warn(warningLog) 725 return 726 } 727 os.RemoveAll(path) 728 log.Info("Deleted trie clean cache", "path", path) 729 }