github.com/ethereum/go-ethereum@v1.16.1/core/rawdb/chain_iterator.go (about) 1 // Copyright 2020 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package rawdb 18 19 import ( 20 "encoding/binary" 21 "runtime" 22 "sync/atomic" 23 "time" 24 25 "github.com/ethereum/go-ethereum/common" 26 "github.com/ethereum/go-ethereum/common/prque" 27 "github.com/ethereum/go-ethereum/core/types" 28 "github.com/ethereum/go-ethereum/ethdb" 29 "github.com/ethereum/go-ethereum/log" 30 "github.com/ethereum/go-ethereum/rlp" 31 ) 32 33 // InitDatabaseFromFreezer reinitializes an empty database from a previous batch 34 // of frozen ancient blocks. The method iterates over all the frozen blocks and 35 // injects into the database the block hash->number mappings. 36 func InitDatabaseFromFreezer(db ethdb.Database) { 37 // If we can't access the freezer or it's empty, abort 38 frozen, err := db.Ancients() 39 if err != nil || frozen == 0 { 40 return 41 } 42 var ( 43 batch = db.NewBatch() 44 start = time.Now() 45 logged = start.Add(-7 * time.Second) // Unindex during import is fast, don't double log 46 hash common.Hash 47 ) 48 for i := uint64(0); i < frozen; { 49 // We read 100K hashes at a time, for a total of 3.2M 50 count := uint64(100_000) 51 if i+count > frozen { 52 count = frozen - i 53 } 54 data, err := db.AncientRange(ChainFreezerHashTable, i, count, 32*count) 55 if err != nil { 56 log.Crit("Failed to init database from freezer", "err", err) 57 } 58 for j, h := range data { 59 number := i + uint64(j) 60 hash = common.BytesToHash(h) 61 WriteHeaderNumber(batch, hash, number) 62 // If enough data was accumulated in memory or we're at the last block, dump to disk 63 if batch.ValueSize() > ethdb.IdealBatchSize { 64 if err := batch.Write(); err != nil { 65 log.Crit("Failed to write data to db", "err", err) 66 } 67 batch.Reset() 68 } 69 } 70 i += uint64(len(data)) 71 // If we've spent too much time already, notify the user of what we're doing 72 if time.Since(logged) > 8*time.Second { 73 log.Info("Initializing database from freezer", "total", frozen, "number", i, "hash", hash, "elapsed", common.PrettyDuration(time.Since(start))) 74 logged = time.Now() 75 } 76 } 77 if err := batch.Write(); err != nil { 78 log.Crit("Failed to write data to db", "err", err) 79 } 80 batch.Reset() 81 82 WriteHeadHeaderHash(db, hash) 83 WriteHeadFastBlockHash(db, hash) 84 log.Info("Initialized database from freezer", "blocks", frozen, "elapsed", common.PrettyDuration(time.Since(start))) 85 } 86 87 type blockTxHashes struct { 88 number uint64 89 hashes []common.Hash 90 } 91 92 // iterateTransactions iterates over all transactions in the (canon) block 93 // number(s) given, and yields the hashes on a channel. If there is a signal 94 // received from interrupt channel, the iteration will be aborted and result 95 // channel will be closed. 96 func iterateTransactions(db ethdb.Database, from uint64, to uint64, reverse bool, interrupt chan struct{}) chan *blockTxHashes { 97 // One thread sequentially reads data from db 98 type numberRlp struct { 99 number uint64 100 rlp rlp.RawValue 101 } 102 if to == from { 103 return nil 104 } 105 threads := to - from 106 if cpus := runtime.NumCPU(); threads > uint64(cpus) { 107 threads = uint64(cpus) 108 } 109 var ( 110 rlpCh = make(chan *numberRlp, threads*2) // we send raw rlp over this channel 111 hashesCh = make(chan *blockTxHashes, threads*2) // send hashes over hashesCh 112 ) 113 // lookup runs in one instance 114 lookup := func() { 115 n, end := from, to 116 if reverse { 117 n, end = to-1, from-1 118 } 119 defer close(rlpCh) 120 for n != end { 121 data := ReadCanonicalBodyRLP(db, n, nil) 122 // Feed the block to the aggregator, or abort on interrupt 123 select { 124 case rlpCh <- &numberRlp{n, data}: 125 case <-interrupt: 126 return 127 } 128 if reverse { 129 n-- 130 } else { 131 n++ 132 } 133 } 134 } 135 // process runs in parallel 136 var nThreadsAlive atomic.Int32 137 nThreadsAlive.Store(int32(threads)) 138 process := func() { 139 defer func() { 140 // Last processor closes the result channel 141 if nThreadsAlive.Add(-1) == 0 { 142 close(hashesCh) 143 } 144 }() 145 for data := range rlpCh { 146 var body types.Body 147 if err := rlp.DecodeBytes(data.rlp, &body); err != nil { 148 log.Warn("Failed to decode block body", "block", data.number, "error", err) 149 return 150 } 151 var hashes []common.Hash 152 for _, tx := range body.Transactions { 153 hashes = append(hashes, tx.Hash()) 154 } 155 result := &blockTxHashes{ 156 hashes: hashes, 157 number: data.number, 158 } 159 // Feed the block to the aggregator, or abort on interrupt 160 select { 161 case hashesCh <- result: 162 case <-interrupt: 163 return 164 } 165 } 166 } 167 go lookup() // start the sequential db accessor 168 for i := 0; i < int(threads); i++ { 169 go process() 170 } 171 return hashesCh 172 } 173 174 // indexTransactions creates txlookup indices of the specified block range. 175 // 176 // This function iterates canonical chain in reverse order, it has one main advantage: 177 // We can write tx index tail flag periodically even without the whole indexing 178 // procedure is finished. So that we can resume indexing procedure next time quickly. 179 // 180 // There is a passed channel, the whole procedure will be interrupted if any 181 // signal received. 182 func indexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool, report bool) { 183 // short circuit for invalid range 184 if from >= to { 185 return 186 } 187 var ( 188 hashesCh = iterateTransactions(db, from, to, true, interrupt) 189 batch = db.NewBatch() 190 start = time.Now() 191 logged = start.Add(-7 * time.Second) 192 193 // Since we iterate in reverse, we expect the first number to come 194 // in to be [to-1]. Therefore, setting lastNum to means that the 195 // queue gap-evaluation will work correctly 196 lastNum = to 197 queue = prque.New[int64, *blockTxHashes](nil) 198 blocks, txs = 0, 0 // for stats reporting 199 ) 200 for chanDelivery := range hashesCh { 201 // Push the delivery into the queue and process contiguous ranges. 202 // Since we iterate in reverse, so lower numbers have lower prio, and 203 // we can use the number directly as prio marker 204 queue.Push(chanDelivery, int64(chanDelivery.number)) 205 for !queue.Empty() { 206 // If the next available item is gapped, return 207 if _, priority := queue.Peek(); priority != int64(lastNum-1) { 208 break 209 } 210 // For testing 211 if hook != nil && !hook(lastNum-1) { 212 break 213 } 214 // Next block available, pop it off and index it 215 delivery := queue.PopItem() 216 lastNum = delivery.number 217 WriteTxLookupEntries(batch, delivery.number, delivery.hashes) 218 blocks++ 219 txs += len(delivery.hashes) 220 // If enough data was accumulated in memory or we're at the last block, dump to disk 221 if batch.ValueSize() > ethdb.IdealBatchSize { 222 WriteTxIndexTail(batch, lastNum) // Also write the tail here 223 if err := batch.Write(); err != nil { 224 log.Crit("Failed writing batch to db", "error", err) 225 return 226 } 227 batch.Reset() 228 } 229 // If we've spent too much time already, notify the user of what we're doing 230 if time.Since(logged) > 8*time.Second { 231 log.Info("Indexing transactions", "blocks", blocks, "txs", txs, "tail", lastNum, "total", to-from, "elapsed", common.PrettyDuration(time.Since(start))) 232 logged = time.Now() 233 } 234 } 235 } 236 // Flush the new indexing tail and the last committed data. It can also happen 237 // that the last batch is empty because nothing to index, but the tail has to 238 // be flushed anyway. 239 WriteTxIndexTail(batch, lastNum) 240 if err := batch.Write(); err != nil { 241 log.Crit("Failed writing batch to db", "error", err) 242 return 243 } 244 logger := log.Debug 245 if report { 246 logger = log.Info 247 } 248 select { 249 case <-interrupt: 250 logger("Transaction indexing interrupted", "blocks", blocks, "txs", txs, "tail", lastNum, "elapsed", common.PrettyDuration(time.Since(start))) 251 default: 252 logger("Indexed transactions", "blocks", blocks, "txs", txs, "tail", lastNum, "elapsed", common.PrettyDuration(time.Since(start))) 253 } 254 } 255 256 // IndexTransactions creates txlookup indices of the specified block range. The from 257 // is included while to is excluded. 258 // 259 // This function iterates canonical chain in reverse order, it has one main advantage: 260 // We can write tx index tail flag periodically even without the whole indexing 261 // procedure is finished. So that we can resume indexing procedure next time quickly. 262 // 263 // There is a passed channel, the whole procedure will be interrupted if any 264 // signal received. 265 func IndexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, report bool) { 266 indexTransactions(db, from, to, interrupt, nil, report) 267 } 268 269 // indexTransactionsForTesting is the internal debug version with an additional hook. 270 func indexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) { 271 indexTransactions(db, from, to, interrupt, hook, false) 272 } 273 274 // unindexTransactions removes txlookup indices of the specified block range. 275 // 276 // There is a passed channel, the whole procedure will be interrupted if any 277 // signal received. 278 func unindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool, report bool) { 279 // short circuit for invalid range 280 if from >= to { 281 return 282 } 283 var ( 284 hashesCh = iterateTransactions(db, from, to, false, interrupt) 285 batch = db.NewBatch() 286 start = time.Now() 287 logged = start.Add(-7 * time.Second) 288 289 // we expect the first number to come in to be [from]. Therefore, setting 290 // nextNum to from means that the queue gap-evaluation will work correctly 291 nextNum = from 292 queue = prque.New[int64, *blockTxHashes](nil) 293 blocks, txs = 0, 0 // for stats reporting 294 ) 295 // Otherwise spin up the concurrent iterator and unindexer 296 for delivery := range hashesCh { 297 // Push the delivery into the queue and process contiguous ranges. 298 queue.Push(delivery, -int64(delivery.number)) 299 for !queue.Empty() { 300 // If the next available item is gapped, return 301 if _, priority := queue.Peek(); -priority != int64(nextNum) { 302 break 303 } 304 // For testing 305 if hook != nil && !hook(nextNum) { 306 break 307 } 308 delivery := queue.PopItem() 309 nextNum = delivery.number + 1 310 DeleteTxLookupEntries(batch, delivery.hashes) 311 txs += len(delivery.hashes) 312 blocks++ 313 314 // If enough data was accumulated in memory or we're at the last block, dump to disk 315 // A batch counts the size of deletion as '1', so we need to flush more 316 // often than that. 317 if blocks%1000 == 0 { 318 WriteTxIndexTail(batch, nextNum) 319 if err := batch.Write(); err != nil { 320 log.Crit("Failed writing batch to db", "error", err) 321 return 322 } 323 batch.Reset() 324 } 325 // If we've spent too much time already, notify the user of what we're doing 326 if time.Since(logged) > 8*time.Second { 327 log.Info("Unindexing transactions", "blocks", blocks, "txs", txs, "total", to-from, "elapsed", common.PrettyDuration(time.Since(start))) 328 logged = time.Now() 329 } 330 } 331 } 332 // Flush the new indexing tail and the last committed data. It can also happen 333 // that the last batch is empty because nothing to unindex, but the tail has to 334 // be flushed anyway. 335 WriteTxIndexTail(batch, nextNum) 336 if err := batch.Write(); err != nil { 337 log.Crit("Failed writing batch to db", "error", err) 338 return 339 } 340 logger := log.Debug 341 if report { 342 logger = log.Info 343 } 344 select { 345 case <-interrupt: 346 logger("Transaction unindexing interrupted", "blocks", blocks, "txs", txs, "tail", to, "elapsed", common.PrettyDuration(time.Since(start))) 347 default: 348 logger("Unindexed transactions", "blocks", blocks, "txs", txs, "tail", to, "elapsed", common.PrettyDuration(time.Since(start))) 349 } 350 } 351 352 // UnindexTransactions removes txlookup indices of the specified block range. 353 // The from is included while to is excluded. 354 // 355 // There is a passed channel, the whole procedure will be interrupted if any 356 // signal received. 357 func UnindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, report bool) { 358 unindexTransactions(db, from, to, interrupt, nil, report) 359 } 360 361 // unindexTransactionsForTesting is the internal debug version with an additional hook. 362 func unindexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) { 363 unindexTransactions(db, from, to, interrupt, hook, false) 364 } 365 366 // PruneTransactionIndex removes all tx index entries below a certain block number. 367 func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) { 368 tail := ReadTxIndexTail(db) 369 if tail == nil || *tail > pruneBlock { 370 return // no index, or index ends above pruneBlock 371 } 372 // There are blocks below pruneBlock in the index. Iterate the entire index to remove 373 // their entries. Note if this fails, the index is messed up, but tail still points to 374 // the old tail. 375 var count, removed int 376 DeleteAllTxLookupEntries(db, func(txhash common.Hash, v []byte) bool { 377 count++ 378 if count%10000000 == 0 { 379 log.Info("Pruning tx index", "count", count, "removed", removed) 380 } 381 if len(v) > 8 { 382 log.Error("Skipping legacy tx index entry", "hash", txhash) 383 return false 384 } 385 bn := decodeNumber(v) 386 if bn < pruneBlock { 387 removed++ 388 return true 389 } 390 return false 391 }) 392 WriteTxIndexTail(db, pruneBlock) 393 } 394 395 func decodeNumber(b []byte) uint64 { 396 var numBuffer [8]byte 397 copy(numBuffer[8-len(b):], b) 398 return binary.BigEndian.Uint64(numBuffer[:]) 399 }