github.com/TrueBlocks/trueblocks-core/src/apps/chifra@v0.0.0-20241022031540-b362680128f7/internal/scrape/scrape_blaze.go (about) 1 package scrapePkg 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "path/filepath" 8 "sort" 9 "strings" 10 "sync" 11 "sync/atomic" 12 13 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/base" 14 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/config" 15 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/logger" 16 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/notify" 17 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/tslib" 18 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/types" 19 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/uniq" 20 "github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/utils" 21 ) 22 23 // HandleBlaze does the actual scraping, walking through block_cnt blocks and querying traces and logs 24 // and then extracting addresses and timestamps from those data structures. 25 func (bm *BlazeManager) HandleBlaze(ctx context.Context, blocks []base.Blknum) (err error) { 26 27 // clear this out 28 bm.errors = make([]scrapeError, 0) 29 30 // We need three pipelines...we shove into blocks, blocks shoves into appearances and timestamps 31 blockChannel := make(chan base.Blknum) 32 appearanceChannel := make(chan scrapedData) 33 tsChannel := make(chan tslib.TimestampRecord) 34 35 // TODO: The go routines below may fail. Question -- how does one respond to an error inside a go routine? 36 37 blockWg := sync.WaitGroup{} 38 blockWg.Add(bm.nChannels) 39 for i := 0; i < bm.nChannels; i++ { 40 go func() { 41 _ = bm.ProcessBlocks(blockChannel, &blockWg, appearanceChannel) 42 }() 43 } 44 45 appWg := sync.WaitGroup{} 46 appWg.Add(bm.nChannels) 47 for i := 0; i < bm.nChannels; i++ { 48 go func() { 49 _ = bm.ProcessAppearances(appearanceChannel, &appWg, tsChannel) 50 }() 51 } 52 53 tsWg := sync.WaitGroup{} 54 tsWg.Add(bm.nChannels) 55 for i := 0; i < bm.nChannels; i++ { 56 go func() { 57 _ = bm.ProcessTimestamps(tsChannel, &tsWg) 58 }() 59 } 60 61 // Now we have three go routines waiting for data. Send it... 62 for _, block := range blocks { 63 if ctx.Err() != nil { 64 // This means the context got cancelled, i.e. we got a SIGINT. 65 return nil 66 } 67 68 blockChannel <- block 69 } 70 71 // ...and wait until we're done... 72 close(blockChannel) 73 blockWg.Wait() 74 75 close(appearanceChannel) 76 appWg.Wait() 77 78 close(tsChannel) 79 tsWg.Wait() 80 81 return nil 82 } 83 84 // ProcessBlocks processes the block channel and for each block query the node for both 85 // traces and logs. Send results down appearanceChannel. 86 func (bm *BlazeManager) ProcessBlocks(blockChannel chan base.Blknum, blockWg *sync.WaitGroup, appearanceChannel chan scrapedData) (err error) { 87 defer blockWg.Done() 88 for bn := range blockChannel { 89 sd := scrapedData{ 90 bn: bn, 91 ts: tslib.TimestampRecord{ 92 Bn: uint32(bn), 93 Ts: uint32(bm.opts.Conn.GetBlockTimestamp(bn)), 94 }, 95 } 96 97 // TODO: BOGUS - we should send in an errorChannel and send the error down that channel and continue here 98 var err error 99 if sd.traces, err = bm.opts.Conn.GetTracesByBlockNumber(bn); err != nil { 100 bm.errors = append(bm.errors, scrapeError{block: bn, err: err}) 101 } else if sd.receipts, _, err = bm.opts.Conn.GetReceiptsByNumber(bn, base.Timestamp(sd.ts.Ts)); err != nil { 102 bm.errors = append(bm.errors, scrapeError{block: bn, err: err}) 103 } else if sd.withdrawals, sd.miner, err = bm.opts.Conn.GetMinerAndWithdrawals(bn); err != nil { 104 bm.errors = append(bm.errors, scrapeError{block: bn, err: err}) 105 } else { 106 appearanceChannel <- sd 107 } 108 } 109 return 110 } 111 112 var blazeMutex sync.Mutex 113 114 // ProcessAppearances processes scrapedData objects shoved down the appearanceChannel 115 func (bm *BlazeManager) ProcessAppearances(appearanceChannel chan scrapedData, appWg *sync.WaitGroup, tsChannel chan tslib.TimestampRecord) (err error) { 116 defer appWg.Done() 117 118 for sData := range appearanceChannel { 119 addrMap := make(uniq.AddressBooleanMap) 120 if err = uniq.UniqFromTraces(bm.chain, sData.traces, addrMap); err != nil { 121 bm.errors = append(bm.errors, scrapeError{block: sData.bn, err: err}) 122 123 } else if err = uniq.UniqFromReceipts(bm.chain, sData.receipts, addrMap); err != nil { 124 bm.errors = append(bm.errors, scrapeError{block: sData.bn, err: err}) 125 126 } else if err = uniq.UniqFromWithdrawals(bm.chain, sData.withdrawals, sData.bn, addrMap); err != nil { 127 bm.errors = append(bm.errors, scrapeError{block: sData.bn, err: err}) 128 129 } else { 130 _ = uniq.AddMiner(bm.chain, sData.miner, sData.bn, addrMap) 131 if err = bm.WriteAppearances(sData.bn, addrMap); err != nil { 132 bm.errors = append(bm.errors, scrapeError{block: sData.bn, err: err}) 133 } 134 } 135 tsChannel <- sData.ts 136 } 137 138 return 139 } 140 141 // ProcessTimestamps processes timestamp data (currently by printing to a temporary file) 142 func (bm *BlazeManager) ProcessTimestamps(tsChannel chan tslib.TimestampRecord, tsWg *sync.WaitGroup) (err error) { 143 defer tsWg.Done() 144 for ts := range tsChannel { 145 blazeMutex.Lock() 146 bm.timestamps[base.Blknum(ts.Bn)] = ts 147 bm.nTimestamps++ 148 blazeMutex.Unlock() 149 } 150 return 151 } 152 153 var writeMutex sync.Mutex 154 155 // TODO: The original intent of creating files was so that we could start over where we left off 156 // if we failed. But this isn't how it works. We cleanup any temp files if we fail, which means 157 // we write these files and if we fail, we remove them. If we don't fail, we've written them out, 158 // but only to re-read them and delete them in this round. Would could have easily just kept them 159 // in an in-memory cache. This would also allow us to not have to stringify the data and just store 160 // pointers to structs in memory. We wouldn't have to keep a seperate timestamps database nor a 161 // processedMap (the pointer would serve that purpose). 162 163 // WriteAppearances writes the appearance for a chunk to a file 164 func (bm *BlazeManager) WriteAppearances(bn base.Blknum, addrMap uniq.AddressBooleanMap) (err error) { 165 ripePath := filepath.Join(config.PathToIndex(bm.chain), "ripe") 166 unripePath := filepath.Join(config.PathToIndex(bm.chain), "unripe") 167 appendScrapeError := func(err error) { 168 bm.errors = append(bm.errors, scrapeError{block: bn, err: err}) 169 } 170 notificationPayload := make([]notify.NotificationPayloadAppearance, 0, len(addrMap)) 171 172 if len(addrMap) > 0 { 173 appearanceArray := make([]string, 0, len(addrMap)) 174 for record := range addrMap { 175 appearanceArray = append(appearanceArray, record) 176 if bn <= bm.ripeBlock { 177 // Only notify about ripe block's appearances 178 payloadItem := notify.NotificationPayloadAppearance{} 179 err := payloadItem.FromString(record) 180 if err != nil { 181 return fmt.Errorf("implementation error - unexpected record format: %s", err) 182 } 183 notificationPayload = append(notificationPayload, payloadItem) 184 } 185 } 186 sort.Strings(appearanceArray) 187 188 blockNumStr := utils.PadNum(int(bn), 9) 189 fileName := filepath.Join(ripePath, blockNumStr+".txt") 190 if bn > bm.ripeBlock { 191 fileName = filepath.Join(unripePath, blockNumStr+".txt") 192 } 193 194 toWrite := []byte(strings.Join(appearanceArray[:], "\n") + "\n") 195 err = os.WriteFile(fileName, toWrite, 0744) // Uses os.O_WRONLY|os.O_CREATE|os.O_TRUNC 196 if err != nil { 197 appendScrapeError(err) 198 return err 199 } 200 } 201 202 if bm.opts.Notify && bn <= bm.ripeBlock { 203 err = Notify(notify.Notification[[]notify.NotificationPayloadAppearance]{ 204 Msg: notify.MessageAppearance, 205 Meta: bm.meta, 206 Payload: notificationPayload, 207 }) 208 if err != nil { 209 // We need this warning, otherwise errors don't show up for 2,000 blocks 210 logger.Error("error sending notification", err) 211 return err 212 } 213 } 214 215 bm.syncedReporting(bn, false /* force */) 216 writeMutex.Lock() 217 bm.processedMap[bn] = true 218 if bn > bm.ripeBlock { 219 bm.nUnripe++ 220 } else { 221 bm.nRipe++ 222 } 223 writeMutex.Unlock() 224 225 return 226 } 227 228 var ( 229 locker uint32 230 ) 231 232 func (bm *BlazeManager) syncedReporting(bn base.Blknum, force bool) { 233 if !atomic.CompareAndSwapUint32(&locker, 0, 1) { 234 // Simply skip the update if someone else is already reporting 235 return 236 } 237 // Make sure to clear the lock on exit 238 defer atomic.StoreUint32(&locker, 0) 239 240 // Only report once in a while (17 blocks) 241 if bm.nProcessed()%17 == 0 || force { 242 dist := base.Blknum(0) 243 if bm.ripeBlock > bn { 244 dist = (bm.ripeBlock - bn) 245 } 246 if bm.isHeadless && dist < 100 { 247 return 248 } 249 msg := fmt.Sprintf("Scraping %-04d of %-04d at block %d of %d (%d blocks from %s head)", 250 bm.nProcessed(), 251 bm.BlockCount(), 252 bn, 253 bm.ripeBlock, 254 dist, 255 bm.chain, 256 ) 257 logger.Progress(true, msg) 258 } 259 } 260 261 // scrapedData combines the extracted block data, trace data, and log data into a 262 // structure that is passed through to the AddressChannel for further processing. 263 type scrapedData struct { 264 bn base.Blknum 265 ts tslib.TimestampRecord 266 traces []types.Trace 267 receipts []types.Receipt 268 withdrawals []types.Withdrawal 269 miner base.Address 270 }