github.com/ethereum/go-ethereum@v1.16.1/triedb/pathdb/history_indexer.go (about) 1 // Copyright 2025 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/ 16 17 package pathdb 18 19 import ( 20 "errors" 21 "fmt" 22 "runtime" 23 "sync" 24 "sync/atomic" 25 "time" 26 27 "github.com/ethereum/go-ethereum/common" 28 "github.com/ethereum/go-ethereum/core/rawdb" 29 "github.com/ethereum/go-ethereum/crypto" 30 "github.com/ethereum/go-ethereum/ethdb" 31 "github.com/ethereum/go-ethereum/log" 32 "github.com/ethereum/go-ethereum/rlp" 33 "golang.org/x/sync/errgroup" 34 ) 35 36 const ( 37 // The batch size for reading state histories 38 historyReadBatch = 1000 39 40 stateIndexV0 = uint8(0) // initial version of state index structure 41 stateIndexVersion = stateIndexV0 // the current state index version 42 ) 43 44 type indexMetadata struct { 45 Version uint8 46 Last uint64 47 } 48 49 func loadIndexMetadata(db ethdb.KeyValueReader) *indexMetadata { 50 blob := rawdb.ReadStateHistoryIndexMetadata(db) 51 if len(blob) == 0 { 52 return nil 53 } 54 var m indexMetadata 55 if err := rlp.DecodeBytes(blob, &m); err != nil { 56 log.Error("Failed to decode index metadata", "err", err) 57 return nil 58 } 59 return &m 60 } 61 62 func storeIndexMetadata(db ethdb.KeyValueWriter, last uint64) { 63 var m indexMetadata 64 m.Version = stateIndexVersion 65 m.Last = last 66 blob, err := rlp.EncodeToBytes(m) 67 if err != nil { 68 log.Crit("Failed to encode index metadata", "err", err) 69 } 70 rawdb.WriteStateHistoryIndexMetadata(db, blob) 71 } 72 73 // batchIndexer is a structure designed to perform batch indexing or unindexing 74 // of state histories atomically. 75 type batchIndexer struct { 76 accounts map[common.Hash][]uint64 // History ID list, Keyed by the hash of account address 77 storages map[common.Hash]map[common.Hash][]uint64 // History ID list, Keyed by the hash of account address and the hash of raw storage key 78 counter int // The counter of processed states 79 delete bool // Index or unindex mode 80 lastID uint64 // The ID of latest processed history 81 db ethdb.KeyValueStore 82 } 83 84 // newBatchIndexer constructs the batch indexer with the supplied mode. 85 func newBatchIndexer(db ethdb.KeyValueStore, delete bool) *batchIndexer { 86 return &batchIndexer{ 87 accounts: make(map[common.Hash][]uint64), 88 storages: make(map[common.Hash]map[common.Hash][]uint64), 89 delete: delete, 90 db: db, 91 } 92 } 93 94 // process iterates through the accounts and their associated storage slots in the 95 // state history, tracking the mapping between state and history IDs. 96 func (b *batchIndexer) process(h *history, historyID uint64) error { 97 for _, address := range h.accountList { 98 addrHash := crypto.Keccak256Hash(address.Bytes()) 99 b.counter += 1 100 b.accounts[addrHash] = append(b.accounts[addrHash], historyID) 101 102 for _, slotKey := range h.storageList[address] { 103 b.counter += 1 104 if _, ok := b.storages[addrHash]; !ok { 105 b.storages[addrHash] = make(map[common.Hash][]uint64) 106 } 107 // The hash of the storage slot key is used as the identifier because the 108 // legacy history does not include the raw storage key, therefore, the 109 // conversion from storage key to hash is necessary for non-v0 histories. 110 slotHash := slotKey 111 if h.meta.version != stateHistoryV0 { 112 slotHash = crypto.Keccak256Hash(slotKey.Bytes()) 113 } 114 b.storages[addrHash][slotHash] = append(b.storages[addrHash][slotHash], historyID) 115 } 116 } 117 b.lastID = historyID 118 return b.finish(false) 119 } 120 121 // finish writes the accumulated state indexes into the disk if either the 122 // memory limitation is reached or it's requested forcibly. 123 func (b *batchIndexer) finish(force bool) error { 124 if b.counter == 0 { 125 return nil 126 } 127 if !force && b.counter < historyIndexBatch { 128 return nil 129 } 130 var ( 131 batch = b.db.NewBatch() 132 batchMu sync.RWMutex 133 storages int 134 start = time.Now() 135 eg errgroup.Group 136 ) 137 eg.SetLimit(runtime.NumCPU()) 138 139 for addrHash, idList := range b.accounts { 140 eg.Go(func() error { 141 if !b.delete { 142 iw, err := newIndexWriter(b.db, newAccountIdent(addrHash)) 143 if err != nil { 144 return err 145 } 146 for _, n := range idList { 147 if err := iw.append(n); err != nil { 148 return err 149 } 150 } 151 batchMu.Lock() 152 iw.finish(batch) 153 batchMu.Unlock() 154 } else { 155 id, err := newIndexDeleter(b.db, newAccountIdent(addrHash)) 156 if err != nil { 157 return err 158 } 159 for _, n := range idList { 160 if err := id.pop(n); err != nil { 161 return err 162 } 163 } 164 batchMu.Lock() 165 id.finish(batch) 166 batchMu.Unlock() 167 } 168 return nil 169 }) 170 } 171 for addrHash, slots := range b.storages { 172 storages += len(slots) 173 for storageHash, idList := range slots { 174 eg.Go(func() error { 175 if !b.delete { 176 iw, err := newIndexWriter(b.db, newStorageIdent(addrHash, storageHash)) 177 if err != nil { 178 return err 179 } 180 for _, n := range idList { 181 if err := iw.append(n); err != nil { 182 return err 183 } 184 } 185 batchMu.Lock() 186 iw.finish(batch) 187 batchMu.Unlock() 188 } else { 189 id, err := newIndexDeleter(b.db, newStorageIdent(addrHash, storageHash)) 190 if err != nil { 191 return err 192 } 193 for _, n := range idList { 194 if err := id.pop(n); err != nil { 195 return err 196 } 197 } 198 batchMu.Lock() 199 id.finish(batch) 200 batchMu.Unlock() 201 } 202 return nil 203 }) 204 } 205 } 206 if err := eg.Wait(); err != nil { 207 return err 208 } 209 // Update the position of last indexed state history 210 if !b.delete { 211 storeIndexMetadata(batch, b.lastID) 212 } else { 213 if b.lastID == 1 { 214 rawdb.DeleteStateHistoryIndexMetadata(batch) 215 } else { 216 storeIndexMetadata(batch, b.lastID-1) 217 } 218 } 219 if err := batch.Write(); err != nil { 220 return err 221 } 222 log.Debug("Committed batch indexer", "accounts", len(b.accounts), "storages", storages, "records", b.counter, "elapsed", common.PrettyDuration(time.Since(start))) 223 b.counter = 0 224 b.accounts = make(map[common.Hash][]uint64) 225 b.storages = make(map[common.Hash]map[common.Hash][]uint64) 226 return nil 227 } 228 229 // indexSingle processes the state history with the specified ID for indexing. 230 func indexSingle(historyID uint64, db ethdb.KeyValueStore, freezer ethdb.AncientReader) error { 231 start := time.Now() 232 defer func() { 233 indexHistoryTimer.UpdateSince(start) 234 }() 235 236 metadata := loadIndexMetadata(db) 237 if metadata == nil || metadata.Last+1 != historyID { 238 last := "null" 239 if metadata != nil { 240 last = fmt.Sprintf("%v", metadata.Last) 241 } 242 return fmt.Errorf("history indexing is out of order, last: %s, requested: %d", last, historyID) 243 } 244 h, err := readHistory(freezer, historyID) 245 if err != nil { 246 return err 247 } 248 b := newBatchIndexer(db, false) 249 if err := b.process(h, historyID); err != nil { 250 return err 251 } 252 if err := b.finish(true); err != nil { 253 return err 254 } 255 log.Debug("Indexed state history", "id", historyID, "elapsed", common.PrettyDuration(time.Since(start))) 256 return nil 257 } 258 259 // unindexSingle processes the state history with the specified ID for unindexing. 260 func unindexSingle(historyID uint64, db ethdb.KeyValueStore, freezer ethdb.AncientReader) error { 261 start := time.Now() 262 defer func() { 263 unindexHistoryTimer.UpdateSince(start) 264 }() 265 266 metadata := loadIndexMetadata(db) 267 if metadata == nil || metadata.Last != historyID { 268 last := "null" 269 if metadata != nil { 270 last = fmt.Sprintf("%v", metadata.Last) 271 } 272 return fmt.Errorf("history unindexing is out of order, last: %s, requested: %d", last, historyID) 273 } 274 h, err := readHistory(freezer, historyID) 275 if err != nil { 276 return err 277 } 278 b := newBatchIndexer(db, true) 279 if err := b.process(h, historyID); err != nil { 280 return err 281 } 282 if err := b.finish(true); err != nil { 283 return err 284 } 285 log.Debug("Unindexed state history", "id", historyID, "elapsed", common.PrettyDuration(time.Since(start))) 286 return nil 287 } 288 289 type interruptSignal struct { 290 newLastID uint64 291 result chan error 292 } 293 294 // indexIniter is responsible for completing the indexing of remaining state 295 // histories in batch. It runs as a one-time background thread and terminates 296 // once all available state histories are indexed. 297 // 298 // Afterward, new state histories should be indexed synchronously alongside 299 // the state data itself, ensuring both the history and its index are available. 300 // If a state history is removed due to a rollback, the associated indexes should 301 // be unmarked accordingly. 302 type indexIniter struct { 303 disk ethdb.KeyValueStore 304 freezer ethdb.AncientStore 305 interrupt chan *interruptSignal 306 done chan struct{} 307 closed chan struct{} 308 309 // indexing progress 310 indexed atomic.Uint64 // the id of latest indexed state 311 last atomic.Uint64 // the id of the target state to be indexed 312 313 wg sync.WaitGroup 314 } 315 316 func newIndexIniter(disk ethdb.KeyValueStore, freezer ethdb.AncientStore, lastID uint64) *indexIniter { 317 initer := &indexIniter{ 318 disk: disk, 319 freezer: freezer, 320 interrupt: make(chan *interruptSignal), 321 done: make(chan struct{}), 322 closed: make(chan struct{}), 323 } 324 // Load indexing progress 325 initer.last.Store(lastID) 326 metadata := loadIndexMetadata(disk) 327 if metadata != nil { 328 initer.indexed.Store(metadata.Last) 329 } 330 331 // Launch background indexer 332 initer.wg.Add(1) 333 go initer.run(lastID) 334 return initer 335 } 336 337 func (i *indexIniter) close() { 338 select { 339 case <-i.closed: 340 return 341 default: 342 close(i.closed) 343 i.wg.Wait() 344 } 345 } 346 347 func (i *indexIniter) inited() bool { 348 select { 349 case <-i.closed: 350 return false 351 case <-i.done: 352 return true 353 default: 354 return false 355 } 356 } 357 358 func (i *indexIniter) remain() uint64 { 359 select { 360 case <-i.closed: 361 return 0 362 case <-i.done: 363 return 0 364 default: 365 last, indexed := i.last.Load(), i.indexed.Load() 366 if last < indexed { 367 log.Error("Invalid state indexing range", "last", last, "indexed", indexed) 368 return 0 369 } 370 return last - indexed 371 } 372 } 373 374 func (i *indexIniter) run(lastID uint64) { 375 defer i.wg.Done() 376 377 // Launch background indexing thread 378 var ( 379 done = make(chan struct{}) 380 interrupt = new(atomic.Int32) 381 382 // checkDone indicates whether all requested state histories 383 // have been fully indexed. 384 checkDone = func() bool { 385 metadata := loadIndexMetadata(i.disk) 386 return metadata != nil && metadata.Last == lastID 387 } 388 ) 389 go i.index(done, interrupt, lastID) 390 391 for { 392 select { 393 case signal := <-i.interrupt: 394 // The indexing limit can only be extended or shortened continuously. 395 if signal.newLastID != lastID+1 && signal.newLastID != lastID-1 { 396 signal.result <- fmt.Errorf("invalid history id, last: %d, got: %d", lastID, signal.newLastID) 397 continue 398 } 399 i.last.Store(signal.newLastID) // update indexing range 400 401 // The index limit is extended by one, update the limit without 402 // interrupting the current background process. 403 if signal.newLastID == lastID+1 { 404 lastID = signal.newLastID 405 signal.result <- nil 406 log.Debug("Extended state history range", "last", lastID) 407 continue 408 } 409 // The index limit is shortened by one, interrupt the current background 410 // process and relaunch with new target. 411 interrupt.Store(1) 412 <-done 413 414 // If all state histories, including the one to be reverted, have 415 // been fully indexed, unindex it here and shut down the initializer. 416 if checkDone() { 417 log.Info("Truncate the extra history", "id", lastID) 418 if err := unindexSingle(lastID, i.disk, i.freezer); err != nil { 419 signal.result <- err 420 return 421 } 422 close(i.done) 423 signal.result <- nil 424 log.Info("State histories have been fully indexed", "last", lastID-1) 425 return 426 } 427 // Adjust the indexing target and relaunch the process 428 lastID = signal.newLastID 429 done, interrupt = make(chan struct{}), new(atomic.Int32) 430 go i.index(done, interrupt, lastID) 431 log.Debug("Shortened state history range", "last", lastID) 432 433 case <-done: 434 if checkDone() { 435 close(i.done) 436 log.Info("State histories have been fully indexed", "last", lastID) 437 return 438 } 439 // Relaunch the background runner if some tasks are left 440 done, interrupt = make(chan struct{}), new(atomic.Int32) 441 go i.index(done, interrupt, lastID) 442 443 case <-i.closed: 444 interrupt.Store(1) 445 log.Info("Waiting background history index initer to exit") 446 <-done 447 448 if checkDone() { 449 close(i.done) 450 } 451 return 452 } 453 } 454 } 455 456 // next returns the ID of the next state history to be indexed. 457 func (i *indexIniter) next() (uint64, error) { 458 tail, err := i.freezer.Tail() 459 if err != nil { 460 return 0, err 461 } 462 tailID := tail + 1 // compute the id of the oldest history 463 464 // Start indexing from scratch if nothing has been indexed 465 metadata := loadIndexMetadata(i.disk) 466 if metadata == nil { 467 log.Debug("Initialize state history indexing from scratch", "id", tailID) 468 return tailID, nil 469 } 470 // Resume indexing from the last interrupted position 471 if metadata.Last+1 >= tailID { 472 log.Debug("Resume state history indexing", "id", metadata.Last+1, "tail", tailID) 473 return metadata.Last + 1, nil 474 } 475 // History has been shortened without indexing. Discard the gapped segment 476 // in the history and shift to the first available element. 477 // 478 // The missing indexes corresponding to the gapped histories won't be visible. 479 // It's fine to leave them unindexed. 480 log.Info("History gap detected, discard old segment", "oldHead", metadata.Last, "newHead", tailID) 481 return tailID, nil 482 } 483 484 func (i *indexIniter) index(done chan struct{}, interrupt *atomic.Int32, lastID uint64) { 485 defer close(done) 486 487 beginID, err := i.next() 488 if err != nil { 489 log.Error("Failed to find next state history for indexing", "err", err) 490 return 491 } 492 // All available state histories have been indexed, and the last indexed one 493 // exceeds the most recent available state history. This situation may occur 494 // when the state is reverted manually (chain.SetHead) or the deep reorg is 495 // encountered. In such cases, no indexing should be scheduled. 496 if beginID > lastID { 497 if lastID == 0 && beginID == 1 { 498 // Initialize the indexing flag if the state history is empty by 499 // using zero as the disk layer ID. This is a common case that 500 // can occur after snap sync. 501 // 502 // This step is essential to avoid spinning up indexing thread 503 // endlessly until a history object is produced. 504 storeIndexMetadata(i.disk, 0) 505 log.Info("Initialized history indexing flag") 506 } else { 507 log.Debug("State history is fully indexed", "last", lastID) 508 } 509 return 510 } 511 log.Info("Start history indexing", "beginID", beginID, "lastID", lastID) 512 513 var ( 514 current = beginID 515 start = time.Now() 516 logged = time.Now() 517 batch = newBatchIndexer(i.disk, false) 518 ) 519 for current <= lastID { 520 count := lastID - current + 1 521 if count > historyReadBatch { 522 count = historyReadBatch 523 } 524 histories, err := readHistories(i.freezer, current, count) 525 if err != nil { 526 // The history read might fall if the history is truncated from 527 // head due to revert operation. 528 log.Error("Failed to read history for indexing", "current", current, "count", count, "err", err) 529 return 530 } 531 for _, h := range histories { 532 if err := batch.process(h, current); err != nil { 533 log.Error("Failed to index history", "err", err) 534 return 535 } 536 current += 1 537 538 // Occasionally report the indexing progress 539 if time.Since(logged) > time.Second*8 { 540 logged = time.Now() 541 542 var ( 543 left = lastID - current + 1 544 done = current - beginID 545 speed = done/uint64(time.Since(start)/time.Millisecond+1) + 1 // +1s to avoid division by zero 546 ) 547 // Override the ETA if larger than the largest until now 548 eta := time.Duration(left/speed) * time.Millisecond 549 log.Info("Indexing state history", "processed", done, "left", left, "elapsed", common.PrettyDuration(time.Since(start)), "eta", common.PrettyDuration(eta)) 550 } 551 } 552 i.indexed.Store(current - 1) // update indexing progress 553 554 // Check interruption signal and abort process if it's fired 555 if interrupt != nil { 556 if signal := interrupt.Load(); signal != 0 { 557 if err := batch.finish(true); err != nil { 558 log.Error("Failed to flush index", "err", err) 559 } 560 log.Info("State indexing interrupted") 561 return 562 } 563 } 564 } 565 if err := batch.finish(true); err != nil { 566 log.Error("Failed to flush index", "err", err) 567 } 568 log.Info("Indexed state history", "from", beginID, "to", lastID, "elapsed", common.PrettyDuration(time.Since(start))) 569 } 570 571 // historyIndexer manages the indexing and unindexing of state histories, 572 // providing access to historical states. 573 // 574 // Upon initialization, historyIndexer starts a one-time background process 575 // to complete the indexing of any remaining state histories. Once this 576 // process is finished, all state histories are marked as fully indexed, 577 // enabling handling of requests for historical states. Thereafter, any new 578 // state histories must be indexed or unindexed synchronously, ensuring that 579 // the history index is created or removed along with the corresponding 580 // state history. 581 type historyIndexer struct { 582 initer *indexIniter 583 disk ethdb.KeyValueStore 584 freezer ethdb.AncientStore 585 } 586 587 // checkVersion checks whether the index data in the database matches the version. 588 func checkVersion(disk ethdb.KeyValueStore) { 589 blob := rawdb.ReadStateHistoryIndexMetadata(disk) 590 if len(blob) == 0 { 591 return 592 } 593 var m indexMetadata 594 err := rlp.DecodeBytes(blob, &m) 595 if err == nil && m.Version == stateIndexVersion { 596 return 597 } 598 // TODO(rjl493456442) would be better to group them into a batch. 599 rawdb.DeleteStateHistoryIndexMetadata(disk) 600 rawdb.DeleteStateHistoryIndex(disk) 601 602 version := "unknown" 603 if err == nil { 604 version = fmt.Sprintf("%d", m.Version) 605 } 606 log.Info("Cleaned up obsolete state history index", "version", version, "want", stateIndexVersion) 607 } 608 609 // newHistoryIndexer constructs the history indexer and launches the background 610 // initer to complete the indexing of any remaining state histories. 611 func newHistoryIndexer(disk ethdb.KeyValueStore, freezer ethdb.AncientStore, lastHistoryID uint64) *historyIndexer { 612 checkVersion(disk) 613 return &historyIndexer{ 614 initer: newIndexIniter(disk, freezer, lastHistoryID), 615 disk: disk, 616 freezer: freezer, 617 } 618 } 619 620 func (i *historyIndexer) close() { 621 i.initer.close() 622 } 623 624 // inited returns a flag indicating whether the existing state histories 625 // have been fully indexed, in other words, whether they are available 626 // for external access. 627 func (i *historyIndexer) inited() bool { 628 return i.initer.inited() 629 } 630 631 // extend sends the notification that new state history with specified ID 632 // has been written into the database and is ready for indexing. 633 func (i *historyIndexer) extend(historyID uint64) error { 634 signal := &interruptSignal{ 635 newLastID: historyID, 636 result: make(chan error, 1), 637 } 638 select { 639 case <-i.initer.closed: 640 return errors.New("indexer is closed") 641 case <-i.initer.done: 642 return indexSingle(historyID, i.disk, i.freezer) 643 case i.initer.interrupt <- signal: 644 return <-signal.result 645 } 646 } 647 648 // shorten sends the notification that state history with specified ID 649 // is about to be deleted from the database and should be unindexed. 650 func (i *historyIndexer) shorten(historyID uint64) error { 651 signal := &interruptSignal{ 652 newLastID: historyID - 1, 653 result: make(chan error, 1), 654 } 655 select { 656 case <-i.initer.closed: 657 return errors.New("indexer is closed") 658 case <-i.initer.done: 659 return unindexSingle(historyID, i.disk, i.freezer) 660 case i.initer.interrupt <- signal: 661 return <-signal.result 662 } 663 } 664 665 // progress returns the indexing progress made so far. It provides the number 666 // of states that remain unindexed. 667 func (i *historyIndexer) progress() (uint64, error) { 668 select { 669 case <-i.initer.closed: 670 return 0, errors.New("indexer is closed") 671 default: 672 return i.initer.remain(), nil 673 } 674 }