github.com/jimmyx0x/go-ethereum@v1.10.28/core/rawdb/chain_iterator.go (about) 1 // Copyright 2020 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package rawdb 18 19 import ( 20 "runtime" 21 "sync/atomic" 22 "time" 23 24 "github.com/ethereum/go-ethereum/common" 25 "github.com/ethereum/go-ethereum/common/prque" 26 "github.com/ethereum/go-ethereum/core/types" 27 "github.com/ethereum/go-ethereum/ethdb" 28 "github.com/ethereum/go-ethereum/log" 29 "github.com/ethereum/go-ethereum/rlp" 30 ) 31 32 // InitDatabaseFromFreezer reinitializes an empty database from a previous batch 33 // of frozen ancient blocks. The method iterates over all the frozen blocks and 34 // injects into the database the block hash->number mappings. 35 func InitDatabaseFromFreezer(db ethdb.Database) { 36 // If we can't access the freezer or it's empty, abort 37 frozen, err := db.Ancients() 38 if err != nil || frozen == 0 { 39 return 40 } 41 var ( 42 batch = db.NewBatch() 43 start = time.Now() 44 logged = start.Add(-7 * time.Second) // Unindex during import is fast, don't double log 45 hash common.Hash 46 ) 47 for i := uint64(0); i < frozen; { 48 // We read 100K hashes at a time, for a total of 3.2M 49 count := uint64(100_000) 50 if i+count > frozen { 51 count = frozen - i 52 } 53 data, err := db.AncientRange(chainFreezerHashTable, i, count, 32*count) 54 if err != nil { 55 log.Crit("Failed to init database from freezer", "err", err) 56 } 57 for j, h := range data { 58 number := i + uint64(j) 59 hash = common.BytesToHash(h) 60 WriteHeaderNumber(batch, hash, number) 61 // If enough data was accumulated in memory or we're at the last block, dump to disk 62 if batch.ValueSize() > ethdb.IdealBatchSize { 63 if err := batch.Write(); err != nil { 64 log.Crit("Failed to write data to db", "err", err) 65 } 66 batch.Reset() 67 } 68 } 69 i += uint64(len(data)) 70 // If we've spent too much time already, notify the user of what we're doing 71 if time.Since(logged) > 8*time.Second { 72 log.Info("Initializing database from freezer", "total", frozen, "number", i, "hash", hash, "elapsed", common.PrettyDuration(time.Since(start))) 73 logged = time.Now() 74 } 75 } 76 if err := batch.Write(); err != nil { 77 log.Crit("Failed to write data to db", "err", err) 78 } 79 batch.Reset() 80 81 WriteHeadHeaderHash(db, hash) 82 WriteHeadFastBlockHash(db, hash) 83 log.Info("Initialized database from freezer", "blocks", frozen, "elapsed", common.PrettyDuration(time.Since(start))) 84 } 85 86 type blockTxHashes struct { 87 number uint64 88 hashes []common.Hash 89 } 90 91 // iterateTransactions iterates over all transactions in the (canon) block 92 // number(s) given, and yields the hashes on a channel. If there is a signal 93 // received from interrupt channel, the iteration will be aborted and result 94 // channel will be closed. 95 func iterateTransactions(db ethdb.Database, from uint64, to uint64, reverse bool, interrupt chan struct{}) chan *blockTxHashes { 96 // One thread sequentially reads data from db 97 type numberRlp struct { 98 number uint64 99 rlp rlp.RawValue 100 } 101 if to == from { 102 return nil 103 } 104 threads := to - from 105 if cpus := runtime.NumCPU(); threads > uint64(cpus) { 106 threads = uint64(cpus) 107 } 108 var ( 109 rlpCh = make(chan *numberRlp, threads*2) // we send raw rlp over this channel 110 hashesCh = make(chan *blockTxHashes, threads*2) // send hashes over hashesCh 111 ) 112 // lookup runs in one instance 113 lookup := func() { 114 n, end := from, to 115 if reverse { 116 n, end = to-1, from-1 117 } 118 defer close(rlpCh) 119 for n != end { 120 data := ReadCanonicalBodyRLP(db, n) 121 // Feed the block to the aggregator, or abort on interrupt 122 select { 123 case rlpCh <- &numberRlp{n, data}: 124 case <-interrupt: 125 return 126 } 127 if reverse { 128 n-- 129 } else { 130 n++ 131 } 132 } 133 } 134 // process runs in parallel 135 nThreadsAlive := int32(threads) 136 process := func() { 137 defer func() { 138 // Last processor closes the result channel 139 if atomic.AddInt32(&nThreadsAlive, -1) == 0 { 140 close(hashesCh) 141 } 142 }() 143 for data := range rlpCh { 144 var body types.Body 145 if err := rlp.DecodeBytes(data.rlp, &body); err != nil { 146 log.Warn("Failed to decode block body", "block", data.number, "error", err) 147 return 148 } 149 var hashes []common.Hash 150 for _, tx := range body.Transactions { 151 hashes = append(hashes, tx.Hash()) 152 } 153 result := &blockTxHashes{ 154 hashes: hashes, 155 number: data.number, 156 } 157 // Feed the block to the aggregator, or abort on interrupt 158 select { 159 case hashesCh <- result: 160 case <-interrupt: 161 return 162 } 163 } 164 } 165 go lookup() // start the sequential db accessor 166 for i := 0; i < int(threads); i++ { 167 go process() 168 } 169 return hashesCh 170 } 171 172 // indexTransactions creates txlookup indices of the specified block range. 173 // 174 // This function iterates canonical chain in reverse order, it has one main advantage: 175 // We can write tx index tail flag periodically even without the whole indexing 176 // procedure is finished. So that we can resume indexing procedure next time quickly. 177 // 178 // There is a passed channel, the whole procedure will be interrupted if any 179 // signal received. 180 func indexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) { 181 // short circuit for invalid range 182 if from >= to { 183 return 184 } 185 var ( 186 hashesCh = iterateTransactions(db, from, to, true, interrupt) 187 batch = db.NewBatch() 188 start = time.Now() 189 logged = start.Add(-7 * time.Second) 190 // Since we iterate in reverse, we expect the first number to come 191 // in to be [to-1]. Therefore, setting lastNum to means that the 192 // prqueue gap-evaluation will work correctly 193 lastNum = to 194 queue = prque.New(nil) 195 // for stats reporting 196 blocks, txs = 0, 0 197 ) 198 for chanDelivery := range hashesCh { 199 // Push the delivery into the queue and process contiguous ranges. 200 // Since we iterate in reverse, so lower numbers have lower prio, and 201 // we can use the number directly as prio marker 202 queue.Push(chanDelivery, int64(chanDelivery.number)) 203 for !queue.Empty() { 204 // If the next available item is gapped, return 205 if _, priority := queue.Peek(); priority != int64(lastNum-1) { 206 break 207 } 208 // For testing 209 if hook != nil && !hook(lastNum-1) { 210 break 211 } 212 // Next block available, pop it off and index it 213 delivery := queue.PopItem().(*blockTxHashes) 214 lastNum = delivery.number 215 WriteTxLookupEntries(batch, delivery.number, delivery.hashes) 216 blocks++ 217 txs += len(delivery.hashes) 218 // If enough data was accumulated in memory or we're at the last block, dump to disk 219 if batch.ValueSize() > ethdb.IdealBatchSize { 220 WriteTxIndexTail(batch, lastNum) // Also write the tail here 221 if err := batch.Write(); err != nil { 222 log.Crit("Failed writing batch to db", "error", err) 223 return 224 } 225 batch.Reset() 226 } 227 // If we've spent too much time already, notify the user of what we're doing 228 if time.Since(logged) > 8*time.Second { 229 log.Info("Indexing transactions", "blocks", blocks, "txs", txs, "tail", lastNum, "total", to-from, "elapsed", common.PrettyDuration(time.Since(start))) 230 logged = time.Now() 231 } 232 } 233 } 234 // Flush the new indexing tail and the last committed data. It can also happen 235 // that the last batch is empty because nothing to index, but the tail has to 236 // be flushed anyway. 237 WriteTxIndexTail(batch, lastNum) 238 if err := batch.Write(); err != nil { 239 log.Crit("Failed writing batch to db", "error", err) 240 return 241 } 242 select { 243 case <-interrupt: 244 log.Debug("Transaction indexing interrupted", "blocks", blocks, "txs", txs, "tail", lastNum, "elapsed", common.PrettyDuration(time.Since(start))) 245 default: 246 log.Debug("Indexed transactions", "blocks", blocks, "txs", txs, "tail", lastNum, "elapsed", common.PrettyDuration(time.Since(start))) 247 } 248 } 249 250 // IndexTransactions creates txlookup indices of the specified block range. The from 251 // is included while to is excluded. 252 // 253 // This function iterates canonical chain in reverse order, it has one main advantage: 254 // We can write tx index tail flag periodically even without the whole indexing 255 // procedure is finished. So that we can resume indexing procedure next time quickly. 256 // 257 // There is a passed channel, the whole procedure will be interrupted if any 258 // signal received. 259 func IndexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}) { 260 indexTransactions(db, from, to, interrupt, nil) 261 } 262 263 // indexTransactionsForTesting is the internal debug version with an additional hook. 264 func indexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) { 265 indexTransactions(db, from, to, interrupt, hook) 266 } 267 268 // unindexTransactions removes txlookup indices of the specified block range. 269 // 270 // There is a passed channel, the whole procedure will be interrupted if any 271 // signal received. 272 func unindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) { 273 // short circuit for invalid range 274 if from >= to { 275 return 276 } 277 var ( 278 hashesCh = iterateTransactions(db, from, to, false, interrupt) 279 batch = db.NewBatch() 280 start = time.Now() 281 logged = start.Add(-7 * time.Second) 282 // we expect the first number to come in to be [from]. Therefore, setting 283 // nextNum to from means that the prqueue gap-evaluation will work correctly 284 nextNum = from 285 queue = prque.New(nil) 286 // for stats reporting 287 blocks, txs = 0, 0 288 ) 289 // Otherwise spin up the concurrent iterator and unindexer 290 for delivery := range hashesCh { 291 // Push the delivery into the queue and process contiguous ranges. 292 queue.Push(delivery, -int64(delivery.number)) 293 for !queue.Empty() { 294 // If the next available item is gapped, return 295 if _, priority := queue.Peek(); -priority != int64(nextNum) { 296 break 297 } 298 // For testing 299 if hook != nil && !hook(nextNum) { 300 break 301 } 302 delivery := queue.PopItem().(*blockTxHashes) 303 nextNum = delivery.number + 1 304 DeleteTxLookupEntries(batch, delivery.hashes) 305 txs += len(delivery.hashes) 306 blocks++ 307 308 // If enough data was accumulated in memory or we're at the last block, dump to disk 309 // A batch counts the size of deletion as '1', so we need to flush more 310 // often than that. 311 if blocks%1000 == 0 { 312 WriteTxIndexTail(batch, nextNum) 313 if err := batch.Write(); err != nil { 314 log.Crit("Failed writing batch to db", "error", err) 315 return 316 } 317 batch.Reset() 318 } 319 // If we've spent too much time already, notify the user of what we're doing 320 if time.Since(logged) > 8*time.Second { 321 log.Info("Unindexing transactions", "blocks", blocks, "txs", txs, "total", to-from, "elapsed", common.PrettyDuration(time.Since(start))) 322 logged = time.Now() 323 } 324 } 325 } 326 // Flush the new indexing tail and the last committed data. It can also happen 327 // that the last batch is empty because nothing to unindex, but the tail has to 328 // be flushed anyway. 329 WriteTxIndexTail(batch, nextNum) 330 if err := batch.Write(); err != nil { 331 log.Crit("Failed writing batch to db", "error", err) 332 return 333 } 334 select { 335 case <-interrupt: 336 log.Debug("Transaction unindexing interrupted", "blocks", blocks, "txs", txs, "tail", to, "elapsed", common.PrettyDuration(time.Since(start))) 337 default: 338 log.Debug("Unindexed transactions", "blocks", blocks, "txs", txs, "tail", to, "elapsed", common.PrettyDuration(time.Since(start))) 339 } 340 } 341 342 // UnindexTransactions removes txlookup indices of the specified block range. 343 // The from is included while to is excluded. 344 // 345 // There is a passed channel, the whole procedure will be interrupted if any 346 // signal received. 347 func UnindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}) { 348 unindexTransactions(db, from, to, interrupt, nil) 349 } 350 351 // unindexTransactionsForTesting is the internal debug version with an additional hook. 352 func unindexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) { 353 unindexTransactions(db, from, to, interrupt, hook) 354 }