github.com/TrueBlocks/trueblocks-core/src/apps/chifra@v0.0.0-20241022031540-b362680128f7/internal/scrape/scrape_blaze.go (about)

     1  package scrapePkg
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"sort"
     9  	"strings"
    10  	"sync"
    11  	"sync/atomic"
    12  
    13  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/base"
    14  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/config"
    15  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/logger"
    16  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/notify"
    17  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/tslib"
    18  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/types"
    19  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/uniq"
    20  	"github.com/TrueBlocks/trueblocks-core/src/apps/chifra/pkg/utils"
    21  )
    22  
    23  // HandleBlaze does the actual scraping, walking through block_cnt blocks and querying traces and logs
    24  // and then extracting addresses and timestamps from those data structures.
    25  func (bm *BlazeManager) HandleBlaze(ctx context.Context, blocks []base.Blknum) (err error) {
    26  
    27  	// clear this out
    28  	bm.errors = make([]scrapeError, 0)
    29  
    30  	// We need three pipelines...we shove into blocks, blocks shoves into appearances and timestamps
    31  	blockChannel := make(chan base.Blknum)
    32  	appearanceChannel := make(chan scrapedData)
    33  	tsChannel := make(chan tslib.TimestampRecord)
    34  
    35  	// TODO: The go routines below may fail. Question -- how does one respond to an error inside a go routine?
    36  
    37  	blockWg := sync.WaitGroup{}
    38  	blockWg.Add(bm.nChannels)
    39  	for i := 0; i < bm.nChannels; i++ {
    40  		go func() {
    41  			_ = bm.ProcessBlocks(blockChannel, &blockWg, appearanceChannel)
    42  		}()
    43  	}
    44  
    45  	appWg := sync.WaitGroup{}
    46  	appWg.Add(bm.nChannels)
    47  	for i := 0; i < bm.nChannels; i++ {
    48  		go func() {
    49  			_ = bm.ProcessAppearances(appearanceChannel, &appWg, tsChannel)
    50  		}()
    51  	}
    52  
    53  	tsWg := sync.WaitGroup{}
    54  	tsWg.Add(bm.nChannels)
    55  	for i := 0; i < bm.nChannels; i++ {
    56  		go func() {
    57  			_ = bm.ProcessTimestamps(tsChannel, &tsWg)
    58  		}()
    59  	}
    60  
    61  	// Now we have three go routines waiting for data. Send it...
    62  	for _, block := range blocks {
    63  		if ctx.Err() != nil {
    64  			// This means the context got cancelled, i.e. we got a SIGINT.
    65  			return nil
    66  		}
    67  
    68  		blockChannel <- block
    69  	}
    70  
    71  	// ...and wait until we're done...
    72  	close(blockChannel)
    73  	blockWg.Wait()
    74  
    75  	close(appearanceChannel)
    76  	appWg.Wait()
    77  
    78  	close(tsChannel)
    79  	tsWg.Wait()
    80  
    81  	return nil
    82  }
    83  
    84  // ProcessBlocks processes the block channel and for each block query the node for both
    85  // traces and logs. Send results down appearanceChannel.
    86  func (bm *BlazeManager) ProcessBlocks(blockChannel chan base.Blknum, blockWg *sync.WaitGroup, appearanceChannel chan scrapedData) (err error) {
    87  	defer blockWg.Done()
    88  	for bn := range blockChannel {
    89  		sd := scrapedData{
    90  			bn: bn,
    91  			ts: tslib.TimestampRecord{
    92  				Bn: uint32(bn),
    93  				Ts: uint32(bm.opts.Conn.GetBlockTimestamp(bn)),
    94  			},
    95  		}
    96  
    97  		// TODO: BOGUS - we should send in an errorChannel and send the error down that channel and continue here
    98  		var err error
    99  		if sd.traces, err = bm.opts.Conn.GetTracesByBlockNumber(bn); err != nil {
   100  			bm.errors = append(bm.errors, scrapeError{block: bn, err: err})
   101  		} else if sd.receipts, _, err = bm.opts.Conn.GetReceiptsByNumber(bn, base.Timestamp(sd.ts.Ts)); err != nil {
   102  			bm.errors = append(bm.errors, scrapeError{block: bn, err: err})
   103  		} else if sd.withdrawals, sd.miner, err = bm.opts.Conn.GetMinerAndWithdrawals(bn); err != nil {
   104  			bm.errors = append(bm.errors, scrapeError{block: bn, err: err})
   105  		} else {
   106  			appearanceChannel <- sd
   107  		}
   108  	}
   109  	return
   110  }
   111  
   112  var blazeMutex sync.Mutex
   113  
   114  // ProcessAppearances processes scrapedData objects shoved down the appearanceChannel
   115  func (bm *BlazeManager) ProcessAppearances(appearanceChannel chan scrapedData, appWg *sync.WaitGroup, tsChannel chan tslib.TimestampRecord) (err error) {
   116  	defer appWg.Done()
   117  
   118  	for sData := range appearanceChannel {
   119  		addrMap := make(uniq.AddressBooleanMap)
   120  		if err = uniq.UniqFromTraces(bm.chain, sData.traces, addrMap); err != nil {
   121  			bm.errors = append(bm.errors, scrapeError{block: sData.bn, err: err})
   122  
   123  		} else if err = uniq.UniqFromReceipts(bm.chain, sData.receipts, addrMap); err != nil {
   124  			bm.errors = append(bm.errors, scrapeError{block: sData.bn, err: err})
   125  
   126  		} else if err = uniq.UniqFromWithdrawals(bm.chain, sData.withdrawals, sData.bn, addrMap); err != nil {
   127  			bm.errors = append(bm.errors, scrapeError{block: sData.bn, err: err})
   128  
   129  		} else {
   130  			_ = uniq.AddMiner(bm.chain, sData.miner, sData.bn, addrMap)
   131  			if err = bm.WriteAppearances(sData.bn, addrMap); err != nil {
   132  				bm.errors = append(bm.errors, scrapeError{block: sData.bn, err: err})
   133  			}
   134  		}
   135  		tsChannel <- sData.ts
   136  	}
   137  
   138  	return
   139  }
   140  
   141  // ProcessTimestamps processes timestamp data (currently by printing to a temporary file)
   142  func (bm *BlazeManager) ProcessTimestamps(tsChannel chan tslib.TimestampRecord, tsWg *sync.WaitGroup) (err error) {
   143  	defer tsWg.Done()
   144  	for ts := range tsChannel {
   145  		blazeMutex.Lock()
   146  		bm.timestamps[base.Blknum(ts.Bn)] = ts
   147  		bm.nTimestamps++
   148  		blazeMutex.Unlock()
   149  	}
   150  	return
   151  }
   152  
   153  var writeMutex sync.Mutex
   154  
   155  // TODO: The original intent of creating files was so that we could start over where we left off
   156  // if we failed. But this isn't how it works. We cleanup any temp files if we fail, which means
   157  // we write these files and if we fail, we remove them. If we don't fail, we've written them out,
   158  // but only to re-read them and delete them in this round. Would could have easily just kept them
   159  // in an in-memory cache. This would also allow us to not have to stringify the data and just store
   160  // pointers to structs in memory. We wouldn't have to keep a seperate timestamps database nor a
   161  // processedMap (the pointer would serve that purpose).
   162  
   163  // WriteAppearances writes the appearance for a chunk to a file
   164  func (bm *BlazeManager) WriteAppearances(bn base.Blknum, addrMap uniq.AddressBooleanMap) (err error) {
   165  	ripePath := filepath.Join(config.PathToIndex(bm.chain), "ripe")
   166  	unripePath := filepath.Join(config.PathToIndex(bm.chain), "unripe")
   167  	appendScrapeError := func(err error) {
   168  		bm.errors = append(bm.errors, scrapeError{block: bn, err: err})
   169  	}
   170  	notificationPayload := make([]notify.NotificationPayloadAppearance, 0, len(addrMap))
   171  
   172  	if len(addrMap) > 0 {
   173  		appearanceArray := make([]string, 0, len(addrMap))
   174  		for record := range addrMap {
   175  			appearanceArray = append(appearanceArray, record)
   176  			if bn <= bm.ripeBlock {
   177  				// Only notify about ripe block's appearances
   178  				payloadItem := notify.NotificationPayloadAppearance{}
   179  				err := payloadItem.FromString(record)
   180  				if err != nil {
   181  					return fmt.Errorf("implementation error - unexpected record format: %s", err)
   182  				}
   183  				notificationPayload = append(notificationPayload, payloadItem)
   184  			}
   185  		}
   186  		sort.Strings(appearanceArray)
   187  
   188  		blockNumStr := utils.PadNum(int(bn), 9)
   189  		fileName := filepath.Join(ripePath, blockNumStr+".txt")
   190  		if bn > bm.ripeBlock {
   191  			fileName = filepath.Join(unripePath, blockNumStr+".txt")
   192  		}
   193  
   194  		toWrite := []byte(strings.Join(appearanceArray[:], "\n") + "\n")
   195  		err = os.WriteFile(fileName, toWrite, 0744) // Uses os.O_WRONLY|os.O_CREATE|os.O_TRUNC
   196  		if err != nil {
   197  			appendScrapeError(err)
   198  			return err
   199  		}
   200  	}
   201  
   202  	if bm.opts.Notify && bn <= bm.ripeBlock {
   203  		err = Notify(notify.Notification[[]notify.NotificationPayloadAppearance]{
   204  			Msg:     notify.MessageAppearance,
   205  			Meta:    bm.meta,
   206  			Payload: notificationPayload,
   207  		})
   208  		if err != nil {
   209  			// We need this warning, otherwise errors don't show up for 2,000 blocks
   210  			logger.Error("error sending notification", err)
   211  			return err
   212  		}
   213  	}
   214  
   215  	bm.syncedReporting(bn, false /* force */)
   216  	writeMutex.Lock()
   217  	bm.processedMap[bn] = true
   218  	if bn > bm.ripeBlock {
   219  		bm.nUnripe++
   220  	} else {
   221  		bm.nRipe++
   222  	}
   223  	writeMutex.Unlock()
   224  
   225  	return
   226  }
   227  
   228  var (
   229  	locker uint32
   230  )
   231  
   232  func (bm *BlazeManager) syncedReporting(bn base.Blknum, force bool) {
   233  	if !atomic.CompareAndSwapUint32(&locker, 0, 1) {
   234  		// Simply skip the update if someone else is already reporting
   235  		return
   236  	}
   237  	// Make sure to clear the lock on exit
   238  	defer atomic.StoreUint32(&locker, 0)
   239  
   240  	// Only report once in a while (17 blocks)
   241  	if bm.nProcessed()%17 == 0 || force {
   242  		dist := base.Blknum(0)
   243  		if bm.ripeBlock > bn {
   244  			dist = (bm.ripeBlock - bn)
   245  		}
   246  		if bm.isHeadless && dist < 100 {
   247  			return
   248  		}
   249  		msg := fmt.Sprintf("Scraping %-04d of %-04d at block %d of %d (%d blocks from %s head)",
   250  			bm.nProcessed(),
   251  			bm.BlockCount(),
   252  			bn,
   253  			bm.ripeBlock,
   254  			dist,
   255  			bm.chain,
   256  		)
   257  		logger.Progress(true, msg)
   258  	}
   259  }
   260  
   261  // scrapedData combines the extracted block data, trace data, and log data into a
   262  // structure that is passed through to the AddressChannel for further processing.
   263  type scrapedData struct {
   264  	bn          base.Blknum
   265  	ts          tslib.TimestampRecord
   266  	traces      []types.Trace
   267  	receipts    []types.Receipt
   268  	withdrawals []types.Withdrawal
   269  	miner       base.Address
   270  }