github.com/baptiste-b-pegasys/quorum/v22@v22.4.2/core/rawdb/chain_iterator.go (about) 1 // Copyright 2019 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package rawdb 18 19 import ( 20 "runtime" 21 "sync/atomic" 22 "time" 23 24 "github.com/ethereum/go-ethereum/common" 25 "github.com/ethereum/go-ethereum/common/prque" 26 "github.com/ethereum/go-ethereum/ethdb" 27 "github.com/ethereum/go-ethereum/log" 28 "github.com/ethereum/go-ethereum/rlp" 29 "golang.org/x/crypto/sha3" 30 ) 31 32 // InitDatabaseFromFreezer reinitializes an empty database from a previous batch 33 // of frozen ancient blocks. The method iterates over all the frozen blocks and 34 // injects into the database the block hash->number mappings. 35 func InitDatabaseFromFreezer(db ethdb.Database) { 36 // If we can't access the freezer or it's empty, abort 37 frozen, err := db.Ancients() 38 if err != nil || frozen == 0 { 39 return 40 } 41 var ( 42 batch = db.NewBatch() 43 start = time.Now() 44 logged = start.Add(-7 * time.Second) // Unindex during import is fast, don't double log 45 hash common.Hash 46 ) 47 for i := uint64(0); i < frozen; i++ { 48 // Since the freezer has all data in sequential order on a file, 49 // it would be 'neat' to read more data in one go, and let the 50 // freezerdb return N items (e.g up to 1000 items per go) 51 // That would require an API change in Ancients though 52 if h, err := db.Ancient(freezerHashTable, i); err != nil { 53 log.Crit("Failed to init database from freezer", "err", err) 54 } else { 55 hash = common.BytesToHash(h) 56 } 57 WriteHeaderNumber(batch, hash, i) 58 // If enough data was accumulated in memory or we're at the last block, dump to disk 59 if batch.ValueSize() > ethdb.IdealBatchSize { 60 if err := batch.Write(); err != nil { 61 log.Crit("Failed to write data to db", "err", err) 62 } 63 batch.Reset() 64 } 65 // If we've spent too much time already, notify the user of what we're doing 66 if time.Since(logged) > 8*time.Second { 67 log.Info("Initializing database from freezer", "total", frozen, "number", i, "hash", hash, "elapsed", common.PrettyDuration(time.Since(start))) 68 logged = time.Now() 69 } 70 } 71 if err := batch.Write(); err != nil { 72 log.Crit("Failed to write data to db", "err", err) 73 } 74 batch.Reset() 75 76 WriteHeadHeaderHash(db, hash) 77 WriteHeadFastBlockHash(db, hash) 78 log.Info("Initialized database from freezer", "blocks", frozen, "elapsed", common.PrettyDuration(time.Since(start))) 79 } 80 81 type blockTxHashes struct { 82 number uint64 83 hashes []common.Hash 84 } 85 86 // iterateTransactions iterates over all transactions in the (canon) block 87 // number(s) given, and yields the hashes on a channel. If there is a signal 88 // received from interrupt channel, the iteration will be aborted and result 89 // channel will be closed. 90 func iterateTransactions(db ethdb.Database, from uint64, to uint64, reverse bool, interrupt chan struct{}) chan *blockTxHashes { 91 // One thread sequentially reads data from db 92 type numberRlp struct { 93 number uint64 94 rlp rlp.RawValue 95 } 96 if to == from { 97 return nil 98 } 99 threads := to - from 100 if cpus := runtime.NumCPU(); threads > uint64(cpus) { 101 threads = uint64(cpus) 102 } 103 var ( 104 rlpCh = make(chan *numberRlp, threads*2) // we send raw rlp over this channel 105 hashesCh = make(chan *blockTxHashes, threads*2) // send hashes over hashesCh 106 ) 107 // lookup runs in one instance 108 lookup := func() { 109 n, end := from, to 110 if reverse { 111 n, end = to-1, from-1 112 } 113 defer close(rlpCh) 114 for n != end { 115 data := ReadCanonicalBodyRLP(db, n) 116 // Feed the block to the aggregator, or abort on interrupt 117 select { 118 case rlpCh <- &numberRlp{n, data}: 119 case <-interrupt: 120 return 121 } 122 if reverse { 123 n-- 124 } else { 125 n++ 126 } 127 } 128 } 129 // process runs in parallel 130 nThreadsAlive := int32(threads) 131 process := func() { 132 defer func() { 133 // Last processor closes the result channel 134 if atomic.AddInt32(&nThreadsAlive, -1) == 0 { 135 close(hashesCh) 136 } 137 }() 138 139 var hasher = sha3.NewLegacyKeccak256() 140 for data := range rlpCh { 141 it, err := rlp.NewListIterator(data.rlp) 142 if err != nil { 143 log.Warn("tx iteration error", "error", err) 144 return 145 } 146 it.Next() 147 txs := it.Value() 148 txIt, err := rlp.NewListIterator(txs) 149 if err != nil { 150 log.Warn("tx iteration error", "error", err) 151 return 152 } 153 var hashes []common.Hash 154 for txIt.Next() { 155 if err := txIt.Err(); err != nil { 156 log.Warn("tx iteration error", "error", err) 157 return 158 } 159 var txHash common.Hash 160 hasher.Reset() 161 hasher.Write(txIt.Value()) 162 hasher.Sum(txHash[:0]) 163 hashes = append(hashes, txHash) 164 } 165 result := &blockTxHashes{ 166 hashes: hashes, 167 number: data.number, 168 } 169 // Feed the block to the aggregator, or abort on interrupt 170 select { 171 case hashesCh <- result: 172 case <-interrupt: 173 return 174 } 175 } 176 } 177 go lookup() // start the sequential db accessor 178 for i := 0; i < int(threads); i++ { 179 go process() 180 } 181 return hashesCh 182 } 183 184 // indexTransactions creates txlookup indices of the specified block range. 185 // 186 // This function iterates canonical chain in reverse order, it has one main advantage: 187 // We can write tx index tail flag periodically even without the whole indexing 188 // procedure is finished. So that we can resume indexing procedure next time quickly. 189 // 190 // There is a passed channel, the whole procedure will be interrupted if any 191 // signal received. 192 func indexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) { 193 // short circuit for invalid range 194 if from >= to { 195 return 196 } 197 var ( 198 hashesCh = iterateTransactions(db, from, to, true, interrupt) 199 batch = db.NewBatch() 200 start = time.Now() 201 logged = start.Add(-7 * time.Second) 202 // Since we iterate in reverse, we expect the first number to come 203 // in to be [to-1]. Therefore, setting lastNum to means that the 204 // prqueue gap-evaluation will work correctly 205 lastNum = to 206 queue = prque.New(nil) 207 // for stats reporting 208 blocks, txs = 0, 0 209 ) 210 for chanDelivery := range hashesCh { 211 // Push the delivery into the queue and process contiguous ranges. 212 // Since we iterate in reverse, so lower numbers have lower prio, and 213 // we can use the number directly as prio marker 214 queue.Push(chanDelivery, int64(chanDelivery.number)) 215 for !queue.Empty() { 216 // If the next available item is gapped, return 217 if _, priority := queue.Peek(); priority != int64(lastNum-1) { 218 break 219 } 220 // For testing 221 if hook != nil && !hook(lastNum-1) { 222 break 223 } 224 // Next block available, pop it off and index it 225 delivery := queue.PopItem().(*blockTxHashes) 226 lastNum = delivery.number 227 WriteTxLookupEntries(batch, delivery.number, delivery.hashes) 228 blocks++ 229 txs += len(delivery.hashes) 230 // If enough data was accumulated in memory or we're at the last block, dump to disk 231 if batch.ValueSize() > ethdb.IdealBatchSize { 232 WriteTxIndexTail(batch, lastNum) // Also write the tail here 233 if err := batch.Write(); err != nil { 234 log.Crit("Failed writing batch to db", "error", err) 235 return 236 } 237 batch.Reset() 238 } 239 // If we've spent too much time already, notify the user of what we're doing 240 if time.Since(logged) > 8*time.Second { 241 log.Info("Indexing transactions", "blocks", blocks, "txs", txs, "tail", lastNum, "total", to-from, "elapsed", common.PrettyDuration(time.Since(start))) 242 logged = time.Now() 243 } 244 } 245 } 246 // Flush the new indexing tail and the last committed data. It can also happen 247 // that the last batch is empty because nothing to index, but the tail has to 248 // be flushed anyway. 249 WriteTxIndexTail(batch, lastNum) 250 if err := batch.Write(); err != nil { 251 log.Crit("Failed writing batch to db", "error", err) 252 return 253 } 254 select { 255 case <-interrupt: 256 log.Debug("Transaction indexing interrupted", "blocks", blocks, "txs", txs, "tail", lastNum, "elapsed", common.PrettyDuration(time.Since(start))) 257 default: 258 log.Info("Indexed transactions", "blocks", blocks, "txs", txs, "tail", lastNum, "elapsed", common.PrettyDuration(time.Since(start))) 259 } 260 } 261 262 // IndexTransactions creates txlookup indices of the specified block range. 263 // 264 // This function iterates canonical chain in reverse order, it has one main advantage: 265 // We can write tx index tail flag periodically even without the whole indexing 266 // procedure is finished. So that we can resume indexing procedure next time quickly. 267 // 268 // There is a passed channel, the whole procedure will be interrupted if any 269 // signal received. 270 func IndexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}) { 271 indexTransactions(db, from, to, interrupt, nil) 272 } 273 274 // indexTransactionsForTesting is the internal debug version with an additional hook. 275 func indexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) { 276 indexTransactions(db, from, to, interrupt, hook) 277 } 278 279 // unindexTransactions removes txlookup indices of the specified block range. 280 // 281 // There is a passed channel, the whole procedure will be interrupted if any 282 // signal received. 283 func unindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) { 284 // short circuit for invalid range 285 if from >= to { 286 return 287 } 288 var ( 289 hashesCh = iterateTransactions(db, from, to, false, interrupt) 290 batch = db.NewBatch() 291 start = time.Now() 292 logged = start.Add(-7 * time.Second) 293 // we expect the first number to come in to be [from]. Therefore, setting 294 // nextNum to from means that the prqueue gap-evaluation will work correctly 295 nextNum = from 296 queue = prque.New(nil) 297 // for stats reporting 298 blocks, txs = 0, 0 299 ) 300 // Otherwise spin up the concurrent iterator and unindexer 301 for delivery := range hashesCh { 302 // Push the delivery into the queue and process contiguous ranges. 303 queue.Push(delivery, -int64(delivery.number)) 304 for !queue.Empty() { 305 // If the next available item is gapped, return 306 if _, priority := queue.Peek(); -priority != int64(nextNum) { 307 break 308 } 309 // For testing 310 if hook != nil && !hook(nextNum) { 311 break 312 } 313 delivery := queue.PopItem().(*blockTxHashes) 314 nextNum = delivery.number + 1 315 DeleteTxLookupEntries(batch, delivery.hashes) 316 txs += len(delivery.hashes) 317 blocks++ 318 319 // If enough data was accumulated in memory or we're at the last block, dump to disk 320 // A batch counts the size of deletion as '1', so we need to flush more 321 // often than that. 322 if blocks%1000 == 0 { 323 WriteTxIndexTail(batch, nextNum) 324 if err := batch.Write(); err != nil { 325 log.Crit("Failed writing batch to db", "error", err) 326 return 327 } 328 batch.Reset() 329 } 330 // If we've spent too much time already, notify the user of what we're doing 331 if time.Since(logged) > 8*time.Second { 332 log.Info("Unindexing transactions", "blocks", blocks, "txs", txs, "total", to-from, "elapsed", common.PrettyDuration(time.Since(start))) 333 logged = time.Now() 334 } 335 } 336 } 337 // Flush the new indexing tail and the last committed data. It can also happen 338 // that the last batch is empty because nothing to unindex, but the tail has to 339 // be flushed anyway. 340 WriteTxIndexTail(batch, nextNum) 341 if err := batch.Write(); err != nil { 342 log.Crit("Failed writing batch to db", "error", err) 343 return 344 } 345 select { 346 case <-interrupt: 347 log.Debug("Transaction unindexing interrupted", "blocks", blocks, "txs", txs, "tail", to, "elapsed", common.PrettyDuration(time.Since(start))) 348 default: 349 log.Info("Unindexed transactions", "blocks", blocks, "txs", txs, "tail", to, "elapsed", common.PrettyDuration(time.Since(start))) 350 } 351 } 352 353 // UnindexTransactions removes txlookup indices of the specified block range. 354 // 355 // There is a passed channel, the whole procedure will be interrupted if any 356 // signal received. 357 func UnindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}) { 358 unindexTransactions(db, from, to, interrupt, nil) 359 } 360 361 // unindexTransactionsForTesting is the internal debug version with an additional hook. 362 func unindexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) { 363 unindexTransactions(db, from, to, interrupt, hook) 364 }