github.com/0xsequence/ethkit@v1.25.0/ethmonitor/ethmonitor.go (about)

     1  package ethmonitor
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"errors"
     7  	"fmt"
     8  	"math/big"
     9  	"sync"
    10  	"sync/atomic"
    11  	"time"
    12  
    13  	"github.com/0xsequence/ethkit/ethrpc"
    14  	"github.com/0xsequence/ethkit/go-ethereum"
    15  	"github.com/0xsequence/ethkit/go-ethereum/common"
    16  	"github.com/0xsequence/ethkit/go-ethereum/core/types"
    17  	"github.com/0xsequence/ethkit/util"
    18  	"github.com/cespare/xxhash/v2"
    19  	"github.com/goware/breaker"
    20  	"github.com/goware/cachestore"
    21  	"github.com/goware/cachestore/cachestorectl"
    22  	"github.com/goware/calc"
    23  	"github.com/goware/channel"
    24  	"github.com/goware/logger"
    25  	"github.com/goware/superr"
    26  )
    27  
    28  var DefaultOptions = Options{
    29  	Logger:                           logger.NewLogger(logger.LogLevel_WARN),
    30  	PollingInterval:                  1500 * time.Millisecond,
    31  	ExpectedBlockInterval:            15 * time.Second,
    32  	StreamingErrorResetInterval:      15 * time.Minute,
    33  	StreamingRetryAfter:              20 * time.Minute,
    34  	StreamingErrNumToSwitchToPolling: 3,
    35  	UnsubscribeOnStop:                false,
    36  	Timeout:                          20 * time.Second,
    37  	StartBlockNumber:                 nil, // latest
    38  	TrailNumBlocksBehindHead:         0,   // latest
    39  	BlockRetentionLimit:              200,
    40  	WithLogs:                         false,
    41  	LogTopics:                        []common.Hash{}, // all logs
    42  	DebugLogging:                     false,
    43  	CacheExpiry:                      300 * time.Second,
    44  	Alerter:                          util.NoopAlerter(),
    45  }
    46  
    47  type Options struct {
    48  	// Logger used by ethmonitor to log warnings and debug info
    49  	Logger logger.Logger
    50  
    51  	// PollingInterval to query the chain for new blocks
    52  	PollingInterval time.Duration
    53  
    54  	// ExpectedBlockInterval is the expected time between blocks
    55  	ExpectedBlockInterval time.Duration
    56  
    57  	// StreamingErrorResetInterval is the time to reset the streaming error count
    58  	StreamingErrorResetInterval time.Duration
    59  
    60  	// StreamingRetryAfter is the time to wait before retrying the streaming again
    61  	StreamingRetryAfter time.Duration
    62  
    63  	// StreamingErrNumToSwitchToPolling is the number of errors before switching to polling
    64  	StreamingErrNumToSwitchToPolling int
    65  
    66  	// Auto-unsubscribe on monitor stop or error
    67  	UnsubscribeOnStop bool
    68  
    69  	// Timeout duration used by the rpc client when fetching data from the remote node.
    70  	Timeout time.Duration
    71  
    72  	// StartBlockNumber to begin the monitor from.
    73  	StartBlockNumber *big.Int
    74  
    75  	// Bootstrap flag which indicates the monitor will expect the monitor's
    76  	// events to be bootstrapped, and will continue from that point. This also
    77  	// takes precedence over StartBlockNumber when set to true.
    78  	Bootstrap bool
    79  
    80  	// TrailNumBlocksBehindHead is the number of blocks we trail behind
    81  	// the head of the chain before broadcasting new events to the subscribers.
    82  	TrailNumBlocksBehindHead int
    83  
    84  	// BlockRetentionLimit is the number of blocks we keep on the canonical chain
    85  	// cache.
    86  	BlockRetentionLimit int
    87  
    88  	// Retain block and logs payloads
    89  	RetainPayloads bool
    90  
    91  	// WithLogs will include logs with the blocks if specified true.
    92  	WithLogs bool
    93  
    94  	// LogTopics will filter only specific log topics to include.
    95  	LogTopics []common.Hash
    96  
    97  	// CacheBackend to use for caching block data
    98  	// NOTE: do not use this unless you know what you're doing.
    99  	// In most cases leave this nil.
   100  	CacheBackend cachestore.Backend
   101  
   102  	// CacheExpiry is how long to keep each record in cache
   103  	CacheExpiry time.Duration
   104  
   105  	// Alerter config via github.com/goware/alerter
   106  	Alerter util.Alerter
   107  
   108  	// DebugLogging toggle
   109  	DebugLogging bool
   110  }
   111  
   112  var (
   113  	ErrFatal                 = errors.New("ethmonitor: fatal error, stopping")
   114  	ErrReorg                 = errors.New("ethmonitor: block reorg")
   115  	ErrUnexpectedParentHash  = errors.New("ethmonitor: unexpected parent hash")
   116  	ErrUnexpectedBlockNumber = errors.New("ethmonitor: unexpected block number")
   117  	ErrQueueFull             = errors.New("ethmonitor: publish queue is full")
   118  	ErrMaxAttempts           = errors.New("ethmonitor: max attempts hit")
   119  	ErrMonitorStopped        = errors.New("ethmonitor: stopped")
   120  )
   121  
   122  type Monitor struct {
   123  	options Options
   124  
   125  	log      logger.Logger
   126  	alert    util.Alerter
   127  	provider ethrpc.RawInterface
   128  
   129  	chain             *Chain
   130  	chainID           *big.Int
   131  	nextBlockNumber   *big.Int
   132  	nextBlockNumberMu sync.Mutex
   133  	pollInterval      atomic.Int64
   134  
   135  	cache cachestore.Store[[]byte]
   136  
   137  	publishCh    chan Blocks
   138  	publishQueue *queue
   139  	subscribers  []*subscriber
   140  
   141  	ctx     context.Context
   142  	ctxStop context.CancelFunc
   143  	running int32
   144  	mu      sync.RWMutex
   145  }
   146  
   147  func NewMonitor(provider ethrpc.RawInterface, options ...Options) (*Monitor, error) {
   148  	opts := DefaultOptions
   149  	if len(options) > 0 {
   150  		opts = options[0]
   151  	}
   152  
   153  	if opts.Logger == nil {
   154  		return nil, fmt.Errorf("ethmonitor: logger is nil")
   155  	}
   156  	if opts.Alerter == nil {
   157  		opts.Alerter = util.NoopAlerter()
   158  	}
   159  
   160  	opts.BlockRetentionLimit += opts.TrailNumBlocksBehindHead
   161  
   162  	if opts.DebugLogging {
   163  		stdLogger, ok := opts.Logger.(*logger.StdLogAdapter)
   164  		if ok {
   165  			stdLogger.Level = logger.LogLevel_DEBUG
   166  		}
   167  	}
   168  
   169  	var err error
   170  	var cache cachestore.Store[[]byte]
   171  	if opts.CacheBackend != nil {
   172  		cache, err = cachestorectl.Open[[]byte](opts.CacheBackend, cachestore.WithLockExpiry(5*time.Second))
   173  		if err != nil {
   174  			return nil, fmt.Errorf("ethmonitor: open cache: %w", err)
   175  		}
   176  
   177  		if opts.CacheExpiry == 0 {
   178  			opts.CacheExpiry = 60 * time.Second
   179  		}
   180  	}
   181  
   182  	return &Monitor{
   183  		options:      opts,
   184  		log:          opts.Logger,
   185  		alert:        opts.Alerter,
   186  		provider:     provider,
   187  		chain:        newChain(opts.BlockRetentionLimit, opts.Bootstrap),
   188  		chainID:      nil,
   189  		cache:        cache,
   190  		publishCh:    make(chan Blocks),
   191  		publishQueue: newQueue(opts.BlockRetentionLimit * 2),
   192  		subscribers:  make([]*subscriber, 0),
   193  	}, nil
   194  }
   195  
   196  func (m *Monitor) lazyInit(ctx context.Context) error {
   197  	var err error
   198  	m.chainID, err = getChainID(ctx, m.provider)
   199  	if err != nil {
   200  		return err
   201  	}
   202  	return nil
   203  }
   204  
   205  func (m *Monitor) Run(ctx context.Context) error {
   206  	if m.IsRunning() {
   207  		return fmt.Errorf("ethmonitor: already running")
   208  	}
   209  
   210  	m.ctx, m.ctxStop = context.WithCancel(ctx)
   211  
   212  	atomic.StoreInt32(&m.running, 1)
   213  	defer atomic.StoreInt32(&m.running, 0)
   214  
   215  	if err := m.lazyInit(ctx); err != nil {
   216  		return err
   217  	}
   218  
   219  	// Check if in bootstrap mode -- in which case we expect nextBlockNumber
   220  	// to already be set.
   221  	if m.options.Bootstrap && m.chain.blocks == nil {
   222  		return errors.New("ethmonitor: monitor is in Bootstrap mode, and must be bootstrapped before run")
   223  	}
   224  
   225  	// Start from latest, or start from a specific block number
   226  	if m.chain.Head() != nil {
   227  		// starting from last block of our canonical chain
   228  		m.nextBlockNumber = big.NewInt(0).Add(m.chain.Head().Number(), big.NewInt(1))
   229  	} else if m.options.StartBlockNumber != nil {
   230  		if m.options.StartBlockNumber.Cmp(big.NewInt(0)) >= 0 {
   231  			// starting from specific block number
   232  			m.nextBlockNumber = m.options.StartBlockNumber
   233  		} else {
   234  			// starting some number blocks behind the latest block num
   235  			latestBlock, _ := m.provider.BlockByNumber(m.ctx, nil)
   236  			if latestBlock != nil && latestBlock.Number() != nil {
   237  				m.nextBlockNumber = big.NewInt(0).Add(latestBlock.Number(), m.options.StartBlockNumber)
   238  				if m.nextBlockNumber.Cmp(big.NewInt(0)) < 0 {
   239  					m.nextBlockNumber = nil
   240  				}
   241  			}
   242  		}
   243  	} else {
   244  		// noop, starting from the latest block on the network
   245  	}
   246  
   247  	if m.nextBlockNumber == nil {
   248  		m.log.Info("ethmonitor: starting from block=latest")
   249  	} else {
   250  		m.log.Infof("ethmonitor: starting from block=%d", m.nextBlockNumber)
   251  	}
   252  
   253  	// Broadcast published events to all subscribers
   254  	go func() {
   255  		for {
   256  			select {
   257  			case <-ctx.Done():
   258  				return
   259  			case blocks := <-m.publishCh:
   260  				if m.options.DebugLogging {
   261  					m.log.Debug("ethmonitor: publishing block", blocks.LatestBlock().NumberU64(), "# events:", len(blocks))
   262  				}
   263  
   264  				// broadcast to subscribers
   265  				m.broadcast(blocks)
   266  			}
   267  		}
   268  	}()
   269  
   270  	// Monitor the chain for canonical representation
   271  	err := m.monitor()
   272  	if m.options.UnsubscribeOnStop {
   273  		m.UnsubscribeAll(err)
   274  	}
   275  	return err
   276  }
   277  
   278  func (m *Monitor) Stop() {
   279  	m.log.Info("ethmonitor: stop")
   280  	if m.ctxStop != nil {
   281  		m.ctxStop()
   282  	}
   283  	if m.options.UnsubscribeOnStop {
   284  		m.UnsubscribeAll(ErrMonitorStopped)
   285  	}
   286  }
   287  
   288  func (m *Monitor) IsRunning() bool {
   289  	return atomic.LoadInt32(&m.running) == 1
   290  }
   291  
   292  func (m *Monitor) Options() Options {
   293  	return m.options
   294  }
   295  
   296  func (m *Monitor) Provider() ethrpc.Interface {
   297  	return m.provider
   298  }
   299  
   300  func (m *Monitor) listenNewHead() <-chan uint64 {
   301  	ch := make(chan uint64)
   302  
   303  	var latestHeadBlock atomic.Uint64
   304  	nextBlock := make(chan uint64)
   305  
   306  	go func() {
   307  		var streamingErrorCount int
   308  		var streamingErrorLastTime time.Time
   309  
   310  	reconnect:
   311  		// reset the latest head block
   312  		latestHeadBlock.Store(0)
   313  
   314  		// if we have too many streaming errors, we'll switch to polling
   315  		streamingErrorCount++
   316  		if time.Since(streamingErrorLastTime) > m.options.StreamingErrorResetInterval {
   317  			streamingErrorCount = 0
   318  		}
   319  
   320  		// listen for new heads either via streaming or polling
   321  		if m.provider.IsStreamingEnabled() && streamingErrorCount < m.options.StreamingErrNumToSwitchToPolling {
   322  			// Streaming mode if available, where we listen for new heads
   323  			// and push the new block number to the nextBlock channel.
   324  			m.log.Info("ethmonitor: starting stream head listener")
   325  
   326  			newHeads := make(chan *types.Header)
   327  			sub, err := m.provider.SubscribeNewHeads(m.ctx, newHeads)
   328  			if err != nil {
   329  				m.log.Warnf("ethmonitor (chain %s): websocket connect failed: %v", m.chainID.String(), err)
   330  				m.alert.Alert(context.Background(), "ethmonitor (chain %s): websocket connect failed: %v", m.chainID.String(), err)
   331  				time.Sleep(2000 * time.Millisecond)
   332  
   333  				streamingErrorLastTime = time.Now()
   334  				goto reconnect
   335  			}
   336  
   337  			for {
   338  				blockTimer := time.NewTimer(3 * m.options.ExpectedBlockInterval)
   339  
   340  				select {
   341  				case <-m.ctx.Done():
   342  					// if we're done, we'll unsubscribe and close the nextBlock channel
   343  					sub.Unsubscribe()
   344  					close(nextBlock)
   345  					blockTimer.Stop()
   346  					return
   347  
   348  				case err := <-sub.Err():
   349  					// if we have an error, we'll reconnect
   350  					m.log.Warnf("ethmonitor (chain %s): websocket subscription error: %v", m.chainID.String(), err)
   351  					m.alert.Alert(context.Background(), "ethmonitor (chain %s): websocket subscription error: %v", m.chainID.String(), err)
   352  					sub.Unsubscribe()
   353  
   354  					streamingErrorLastTime = time.Now()
   355  					blockTimer.Stop()
   356  					goto reconnect
   357  
   358  				case <-blockTimer.C:
   359  					// if we haven't received a new block in a while, we'll reconnect.
   360  					m.log.Warnf("ethmonitor: haven't received block in expected time, reconnecting..")
   361  					sub.Unsubscribe()
   362  
   363  					streamingErrorLastTime = time.Now()
   364  					goto reconnect
   365  
   366  				case newHead := <-newHeads:
   367  					blockTimer.Stop()
   368  
   369  					latestHeadBlock.Store(newHead.Number.Uint64())
   370  					select {
   371  					case nextBlock <- newHead.Number.Uint64():
   372  					default:
   373  						// non-blocking
   374  					}
   375  				}
   376  			}
   377  		} else {
   378  			// We default to polling if streaming is not enabled
   379  			m.log.Info("ethmonitor: starting poll head listener")
   380  
   381  			retryStreamingTimer := time.NewTimer(m.options.StreamingRetryAfter)
   382  			for {
   383  				// if streaming is enabled, we'll retry streaming
   384  				if m.provider.IsStreamingEnabled() {
   385  					select {
   386  					case <-retryStreamingTimer.C:
   387  						// retry streaming
   388  						m.log.Info("ethmonitor: retrying streaming...")
   389  						streamingErrorLastTime = time.Now().Add(-m.options.StreamingErrorResetInterval * 2)
   390  						goto reconnect
   391  					default:
   392  						// non-blocking
   393  					}
   394  				}
   395  
   396  				// Polling mode, where we poll for the latest block number
   397  				select {
   398  				case <-m.ctx.Done():
   399  					// if we're done, we'll close the nextBlock channel
   400  					close(nextBlock)
   401  					retryStreamingTimer.Stop()
   402  					return
   403  
   404  				case <-time.After(time.Duration(m.pollInterval.Load())):
   405  					nextBlock <- 0
   406  				}
   407  			}
   408  		}
   409  	}()
   410  
   411  	// The main loop which notifies the monitor to continue to the next block
   412  	go func() {
   413  		for {
   414  			select {
   415  			case <-m.ctx.Done():
   416  				return
   417  			default:
   418  			}
   419  
   420  			var nextBlockNumber uint64
   421  			m.nextBlockNumberMu.Lock()
   422  			if m.nextBlockNumber != nil {
   423  				nextBlockNumber = m.nextBlockNumber.Uint64()
   424  			}
   425  			m.nextBlockNumberMu.Unlock()
   426  
   427  			latestBlockNum := latestHeadBlock.Load()
   428  			if nextBlockNumber == 0 || latestBlockNum > nextBlockNumber {
   429  				// monitor is behind, so we just push to keep going without
   430  				// waiting on the nextBlock channel
   431  				ch <- nextBlockNumber
   432  				continue
   433  			} else {
   434  				// wait for the next block
   435  				<-nextBlock
   436  				ch <- latestBlockNum
   437  			}
   438  		}
   439  	}()
   440  
   441  	return ch
   442  }
   443  
   444  func (m *Monitor) monitor() error {
   445  	ctx := m.ctx
   446  	events := Blocks{}
   447  
   448  	// minLoopInterval is time we monitor between cycles. It's a fast
   449  	// and fixed amount of time, as the internal method `fetchNextBlock`
   450  	// will actually use the poll interval while searching for the next block.
   451  	minLoopInterval := 5 * time.Millisecond
   452  
   453  	// listen for new heads either via streaming or polling
   454  	listenNewHead := m.listenNewHead()
   455  
   456  	// monitor run loop
   457  	for {
   458  		select {
   459  
   460  		case <-m.ctx.Done():
   461  			return nil
   462  
   463  		case newHeadNum := <-listenNewHead:
   464  			// ensure we have a new head number
   465  			m.nextBlockNumberMu.Lock()
   466  			if m.nextBlockNumber != nil && newHeadNum > 0 && m.nextBlockNumber.Uint64() > newHeadNum {
   467  				m.nextBlockNumberMu.Unlock()
   468  				continue
   469  			}
   470  			m.nextBlockNumberMu.Unlock()
   471  
   472  			// check if we have a head block, if not, then we set the nextBlockNumber
   473  			headBlock := m.chain.Head()
   474  			if headBlock != nil {
   475  				m.nextBlockNumberMu.Lock()
   476  				m.nextBlockNumber = big.NewInt(0).Add(headBlock.Number(), big.NewInt(1))
   477  				m.nextBlockNumberMu.Unlock()
   478  			}
   479  
   480  			// fetch the next block, either via the stream or via a poll
   481  			nextBlock, nextBlockPayload, miss, err := m.fetchNextBlock(ctx)
   482  			if err != nil {
   483  				if errors.Is(err, context.DeadlineExceeded) {
   484  					m.log.Infof("ethmonitor: fetchNextBlock timed out: '%v', for blockNum:%v, retrying..", err, m.nextBlockNumber)
   485  				} else {
   486  					m.log.Warnf("ethmonitor: fetchNextBlock error reported '%v', for blockNum:%v, retrying..", err, m.nextBlockNumber)
   487  				}
   488  
   489  				// pause, then retry
   490  				time.Sleep(m.options.PollingInterval)
   491  				continue
   492  			}
   493  
   494  			// if we hit a miss between calls, then we reset the pollInterval, otherwise
   495  			// we speed up the polling interval
   496  			if miss {
   497  				m.pollInterval.Store(int64(m.options.PollingInterval))
   498  			} else {
   499  				m.pollInterval.Store(int64(clampDuration(minLoopInterval, time.Duration(m.pollInterval.Load())/4)))
   500  			}
   501  
   502  			// build deterministic set of add/remove events which construct the canonical chain
   503  			events, err = m.buildCanonicalChain(ctx, nextBlock, nextBlockPayload, events)
   504  			if err != nil {
   505  				m.log.Warnf("ethmonitor: error reported '%v', failed to build chain for next blockNum:%d blockHash:%s, retrying..",
   506  					err, nextBlock.NumberU64(), nextBlock.Hash().Hex())
   507  
   508  				// pause, then retry
   509  				time.Sleep(m.options.PollingInterval)
   510  				continue
   511  			}
   512  
   513  			m.chain.mu.Lock()
   514  			if m.options.WithLogs {
   515  				m.addLogs(ctx, events)
   516  				m.backfillChainLogs(ctx, events)
   517  			} else {
   518  				for _, b := range events {
   519  					b.Logs = nil // nil it out to be clear to subscribers
   520  					b.OK = true
   521  				}
   522  			}
   523  			m.chain.mu.Unlock()
   524  
   525  			// publish events
   526  			err = m.publish(ctx, events)
   527  			if err != nil {
   528  				// failing to publish is considered a rare, but fatal error.
   529  				// the only time this happens is if we fail to push an event to the publish queue.
   530  				return superr.New(ErrFatal, err)
   531  			}
   532  
   533  			// clear events sink
   534  			events = Blocks{}
   535  		}
   536  	}
   537  }
   538  
   539  func (m *Monitor) buildCanonicalChain(ctx context.Context, nextBlock *types.Block, nextBlockPayload []byte, events Blocks) (Blocks, error) {
   540  	select {
   541  	case <-ctx.Done():
   542  		return nil, ctx.Err()
   543  	default:
   544  	}
   545  
   546  	headBlock := m.chain.Head()
   547  
   548  	m.log.Debugf("ethmonitor: new block #%d hash:%s prevHash:%s numTxns:%d",
   549  		nextBlock.NumberU64(), nextBlock.Hash().String(), nextBlock.ParentHash().String(), len(nextBlock.Transactions()))
   550  
   551  	if headBlock == nil || nextBlock.ParentHash() == headBlock.Hash() {
   552  		// block-chaining it up
   553  		block := &Block{Event: Added, Block: nextBlock, BlockPayload: m.setPayload(nextBlockPayload)}
   554  		events = append(events, block)
   555  		return events, m.chain.push(block)
   556  	}
   557  
   558  	// next block doest match prevHash, therefore we must pop our previous block and recursively
   559  	// rebuild the canonical chain
   560  	poppedBlock := *m.chain.pop() // assign by value so it won't be mutated later
   561  	poppedBlock.Event = Removed
   562  	poppedBlock.OK = true // removed blocks are ready
   563  
   564  	// purge the block num from the cache
   565  	if m.cache != nil {
   566  		key := cacheKeyBlockNum(m.chainID, poppedBlock.Number())
   567  		err := m.cache.Delete(ctx, key)
   568  		if err != nil {
   569  			m.log.Warnf("ethmonitor: error deleting block cache for block num %d due to: '%v'", err, poppedBlock.Number().Uint64())
   570  		}
   571  	}
   572  
   573  	m.log.Debugf("ethmonitor: block reorg, reverting block #%d hash:%s prevHash:%s", poppedBlock.NumberU64(), poppedBlock.Hash().Hex(), poppedBlock.ParentHash().Hex())
   574  	events = append(events, &poppedBlock)
   575  
   576  	// let's always take a pause between any reorg for the polling interval time
   577  	// to allow nodes to sync to the correct chain
   578  	pause := calc.Max(2*m.options.PollingInterval, 2*time.Second)
   579  	time.Sleep(pause)
   580  
   581  	// Fetch/connect the broken chain backwards by traversing recursively via parent hashes
   582  	nextParentBlock, nextParentBlockPayload, err := m.fetchBlockByHash(ctx, nextBlock.ParentHash())
   583  	if err != nil {
   584  		// NOTE: this is okay, it will auto-retry
   585  		return events, err
   586  	}
   587  
   588  	events, err = m.buildCanonicalChain(ctx, nextParentBlock, nextParentBlockPayload, events)
   589  	if err != nil {
   590  		// NOTE: this is okay, it will auto-retry
   591  		return events, err
   592  	}
   593  
   594  	block := &Block{Event: Added, Block: nextBlock, BlockPayload: m.setPayload(nextBlockPayload)}
   595  	err = m.chain.push(block)
   596  	if err != nil {
   597  		return events, err
   598  	}
   599  	events = append(events, block)
   600  
   601  	return events, nil
   602  }
   603  
   604  func (m *Monitor) addLogs(ctx context.Context, blocks Blocks) {
   605  	tctx, cancel := context.WithTimeout(ctx, m.options.Timeout)
   606  	defer cancel()
   607  
   608  	for _, block := range blocks {
   609  		select {
   610  		case <-ctx.Done():
   611  			return
   612  		default:
   613  		}
   614  
   615  		// skip, we already have logs for this block or its a removed block
   616  		if block.OK {
   617  			continue
   618  		}
   619  
   620  		// do not attempt to get logs for re-org'd blocks as the data
   621  		// will be inconsistent and may never be available.
   622  		if block.Event == Removed {
   623  			block.OK = true
   624  			continue
   625  		}
   626  
   627  		blockHash := block.Hash()
   628  
   629  		topics := [][]common.Hash{}
   630  		if len(m.options.LogTopics) > 0 {
   631  			topics = append(topics, m.options.LogTopics)
   632  		}
   633  
   634  		logs, logsPayload, err := m.filterLogs(tctx, blockHash, topics)
   635  
   636  		if err == nil {
   637  			// check the logsBloom from the block to check if we should be expecting logs. logsBloom
   638  			// will be included for any indexed logs.
   639  			if len(logs) > 0 || block.Bloom() == (types.Bloom{}) {
   640  				// successful backfill
   641  				if logs == nil {
   642  					block.Logs = []types.Log{}
   643  				} else {
   644  					block.Logs = logs
   645  				}
   646  				block.LogsPayload = m.setPayload(logsPayload)
   647  				block.OK = true
   648  				continue
   649  			}
   650  		}
   651  
   652  		// mark for backfilling
   653  		block.Logs = nil
   654  		block.OK = false
   655  
   656  		// NOTE: we do not error here as these logs will be backfilled before they are published anyways,
   657  		// but we log the error anyways.
   658  		m.log.Infof("ethmonitor: [getLogs failed -- marking block %s for log backfilling] %v", blockHash.Hex(), err)
   659  	}
   660  }
   661  
   662  func (m *Monitor) filterLogs(ctx context.Context, blockHash common.Hash, topics [][]common.Hash) ([]types.Log, []byte, error) {
   663  	getter := func(ctx context.Context, _ string) ([]byte, error) {
   664  		m.log.Debugf("ethmonitor: filterLogs is calling origin for block hash %s", blockHash)
   665  
   666  		tctx, cancel := context.WithTimeout(ctx, 4*time.Second)
   667  		defer cancel()
   668  
   669  		logsPayload, err := m.provider.RawFilterLogs(tctx, ethereum.FilterQuery{
   670  			BlockHash: &blockHash,
   671  			Topics:    topics,
   672  		})
   673  		return logsPayload, err
   674  	}
   675  
   676  	if m.cache == nil {
   677  		resp, err := getter(ctx, "")
   678  		if err != nil {
   679  			return nil, resp, err
   680  		}
   681  		logs, err := unmarshalLogs(resp)
   682  		return logs, resp, err
   683  	}
   684  
   685  	topicsDigest := xxhash.New()
   686  	for _, hashes := range topics {
   687  		for _, hash := range hashes {
   688  			topicsDigest.Write(hash.Bytes())
   689  		}
   690  		topicsDigest.Write([]byte{'\n'})
   691  	}
   692  
   693  	key := fmt.Sprintf("ethmonitor:%s:Logs:hash=%s;topics=%d", m.chainID.String(), blockHash.String(), topicsDigest.Sum64())
   694  	resp, err := m.cache.GetOrSetWithLockEx(ctx, key, getter, m.options.CacheExpiry)
   695  	if err != nil {
   696  		return nil, resp, err
   697  	}
   698  	logs, err := unmarshalLogs(resp)
   699  	return logs, resp, err
   700  }
   701  
   702  func (m *Monitor) backfillChainLogs(ctx context.Context, newBlocks Blocks) {
   703  	// Backfill logs for failed getLog calls across the retained chain.
   704  
   705  	// In cases of re-orgs and inconsistencies with node state, in certain cases
   706  	// we have to backfill log fetching and send an updated block event to subscribers.
   707  
   708  	// We start by looking through our entire blocks retention for addLogs failed
   709  	// and attempt to fetch the logs again for the same block object.
   710  	//
   711  	// NOTE: we only back-fill 'Added' blocks, as any 'Removed' blocks could be reverted
   712  	// and their logs will never be available from a node.
   713  	blocks := m.chain.blocks
   714  
   715  	for i := len(blocks) - 1; i >= 0; i-- {
   716  		select {
   717  		case <-ctx.Done():
   718  			return
   719  		default:
   720  		}
   721  
   722  		// check if this was a recently added block in the same cycle to avoid
   723  		// making extra backfill calls which just happened before call to backfillChainLogs(..)
   724  		if len(newBlocks) > 0 {
   725  			_, ok := newBlocks.FindBlock(blocks[i].Hash())
   726  			if ok {
   727  				continue
   728  			}
   729  		}
   730  
   731  		// attempt to backfill if necessary
   732  		if !blocks[i].OK {
   733  			m.addLogs(ctx, Blocks{blocks[i]})
   734  			if blocks[i].Event == Added && blocks[i].OK {
   735  				m.log.Infof("ethmonitor: [getLogs backfill successful for block:%d %s]", blocks[i].NumberU64(), blocks[i].Hash().Hex())
   736  			}
   737  		}
   738  	}
   739  }
   740  
   741  func (m *Monitor) fetchNextBlock(ctx context.Context) (*types.Block, []byte, bool, error) {
   742  	miss := false
   743  
   744  	getter := func(ctx context.Context, _ string) ([]byte, error) {
   745  		m.log.Debugf("ethmonitor: fetchNextBlock is calling origin for number %s", m.nextBlockNumber)
   746  		for {
   747  			select {
   748  			case <-ctx.Done():
   749  				return nil, ctx.Err()
   750  			default:
   751  			}
   752  
   753  			nextBlockPayload, err := m.fetchRawBlockByNumber(ctx, m.nextBlockNumber)
   754  			if errors.Is(err, ethereum.NotFound) {
   755  				miss = true
   756  				if m.provider.IsStreamingEnabled() {
   757  					// in streaming mode, we'll use a shorter time to pause before we refetch
   758  					time.Sleep(200 * time.Millisecond)
   759  				} else {
   760  					time.Sleep(m.options.PollingInterval)
   761  				}
   762  				continue
   763  			}
   764  			if err != nil {
   765  				m.log.Warnf("ethmonitor: [retrying] failed to fetch next block # %d, due to: %v", m.nextBlockNumber, err)
   766  				miss = true
   767  				time.Sleep(m.options.PollingInterval)
   768  				continue
   769  			}
   770  
   771  			return nextBlockPayload, nil
   772  		}
   773  	}
   774  
   775  	var nextBlockNumber *big.Int
   776  	m.nextBlockNumberMu.Lock()
   777  	if m.nextBlockNumber != nil {
   778  		nextBlockNumber = big.NewInt(0).Set(m.nextBlockNumber)
   779  	}
   780  	m.nextBlockNumberMu.Unlock()
   781  
   782  	// skip cache if isn't provided, or in case when nextBlockNumber is nil (latest)
   783  	if m.cache == nil || nextBlockNumber == nil {
   784  		resp, err := getter(ctx, "")
   785  		if err != nil {
   786  			return nil, resp, miss, err
   787  		}
   788  		block, err := unmarshalBlock(resp)
   789  		return block, resp, miss, err
   790  	}
   791  
   792  	// fetch with distributed mutex
   793  	key := cacheKeyBlockNum(m.chainID, nextBlockNumber)
   794  	resp, err := m.cache.GetOrSetWithLockEx(ctx, key, getter, m.options.CacheExpiry)
   795  	if err != nil {
   796  		return nil, resp, miss, err
   797  	}
   798  	block, err := unmarshalBlock(resp)
   799  	return block, resp, miss, err
   800  }
   801  
   802  func cacheKeyBlockNum(chainID *big.Int, num *big.Int) string {
   803  	return fmt.Sprintf("ethmonitor:%s:BlockNum:%s", chainID.String(), num.String())
   804  }
   805  
   806  func (m *Monitor) fetchRawBlockByNumber(ctx context.Context, num *big.Int) ([]byte, error) {
   807  	m.log.Debugf("ethmonitor: fetchRawBlockByNumber is calling origin for number %s", num)
   808  	maxErrAttempts, errAttempts := 3, 0 // quick retry in case of short-term node connection failures
   809  
   810  	var blockPayload []byte
   811  	var err error
   812  
   813  	for {
   814  		select {
   815  		case <-ctx.Done():
   816  			return nil, ctx.Err()
   817  		default:
   818  		}
   819  
   820  		if errAttempts >= maxErrAttempts {
   821  			m.log.Warnf("ethmonitor: fetchBlockByNumber hit maxErrAttempts after %d tries for block num %v due to %v", errAttempts, num, err)
   822  			return nil, superr.New(ErrMaxAttempts, err)
   823  		}
   824  
   825  		tctx, cancel := context.WithTimeout(ctx, m.options.Timeout)
   826  		defer cancel()
   827  
   828  		blockPayload, err = m.provider.RawBlockByNumber(tctx, num)
   829  		if err != nil {
   830  			if errors.Is(err, ethereum.NotFound) {
   831  				return nil, ethereum.NotFound
   832  			} else {
   833  				m.log.Warnf("ethmonitor: fetchBlockByNumber failed due to: %v", err)
   834  				errAttempts++
   835  				time.Sleep(time.Duration(errAttempts) * time.Second)
   836  				continue
   837  			}
   838  		}
   839  		return blockPayload, nil
   840  	}
   841  }
   842  
   843  func (m *Monitor) fetchBlockByHash(ctx context.Context, hash common.Hash) (*types.Block, []byte, error) {
   844  	getter := func(ctx context.Context, _ string) ([]byte, error) {
   845  		m.log.Debugf("ethmonitor: fetchBlockByHash is calling origin for hash %s", hash)
   846  
   847  		maxNotFoundAttempts, notFoundAttempts := 2, 0 // waiting for node to sync
   848  		maxErrAttempts, errAttempts := 2, 0           // quick retry in case of short-term node connection failures
   849  
   850  		var blockPayload []byte
   851  		var err error
   852  
   853  		for {
   854  			select {
   855  			case <-ctx.Done():
   856  				return nil, ctx.Err()
   857  			default:
   858  			}
   859  
   860  			if notFoundAttempts >= maxNotFoundAttempts {
   861  				return nil, ethereum.NotFound
   862  			}
   863  			if errAttempts >= maxErrAttempts {
   864  				m.log.Warnf("ethmonitor: fetchBlockByHash hit maxErrAttempts after %d tries for block hash %s due to %v", errAttempts, hash.Hex(), err)
   865  				return nil, superr.New(ErrMaxAttempts, err)
   866  			}
   867  
   868  			blockPayload, err = m.provider.RawBlockByHash(ctx, hash)
   869  			if err != nil {
   870  				if errors.Is(err, ethereum.NotFound) {
   871  					notFoundAttempts++
   872  					time.Sleep(time.Duration(notFoundAttempts) * time.Second)
   873  					continue
   874  				} else {
   875  					errAttempts++
   876  					time.Sleep(time.Duration(errAttempts) * time.Second)
   877  					continue
   878  				}
   879  			}
   880  			if len(blockPayload) > 0 {
   881  				return blockPayload, nil
   882  			}
   883  		}
   884  	}
   885  
   886  	// skip if cache isn't provided
   887  	if m.cache == nil {
   888  		resp, err := getter(ctx, "")
   889  		if err != nil {
   890  			return nil, nil, err
   891  		}
   892  		block, err := unmarshalBlock(resp)
   893  		return block, nil, err
   894  	}
   895  
   896  	// fetch with distributed mutex
   897  	key := fmt.Sprintf("ethmonitor:%s:BlockHash:%s", m.chainID.String(), hash.String())
   898  	resp, err := m.cache.GetOrSetWithLockEx(ctx, key, getter, m.options.CacheExpiry)
   899  	if err != nil {
   900  		return nil, nil, err
   901  	}
   902  	block, err := unmarshalBlock(resp)
   903  	return block, resp, err
   904  }
   905  
   906  func (m *Monitor) publish(ctx context.Context, events Blocks) error {
   907  	// skip publish enqueuing if there are no subscribers
   908  	m.mu.Lock()
   909  	if len(m.subscribers) == 0 {
   910  		m.mu.Unlock()
   911  		return nil
   912  	}
   913  	m.mu.Unlock()
   914  
   915  	// Check for trail-behind-head mode and set maxBlockNum if applicable
   916  	maxBlockNum := uint64(0)
   917  	if m.options.TrailNumBlocksBehindHead > 0 {
   918  		maxBlockNum = m.LatestBlock().NumberU64() - uint64(m.options.TrailNumBlocksBehindHead)
   919  	}
   920  
   921  	// Enqueue
   922  	err := m.publishQueue.enqueue(events)
   923  	if err != nil {
   924  		return err
   925  	}
   926  
   927  	// Publish events existing in the queue
   928  	pubEvents, ok := m.publishQueue.dequeue(maxBlockNum)
   929  	if ok {
   930  		m.publishCh <- pubEvents
   931  	}
   932  
   933  	return nil
   934  }
   935  
   936  func (m *Monitor) broadcast(events Blocks) {
   937  	m.mu.Lock()
   938  	defer m.mu.Unlock()
   939  
   940  	for _, sub := range m.subscribers {
   941  		sub.ch.Send(events)
   942  	}
   943  }
   944  
   945  func (m *Monitor) Subscribe(optLabel ...string) Subscription {
   946  	m.mu.Lock()
   947  	defer m.mu.Unlock()
   948  
   949  	var label string
   950  	if len(optLabel) > 0 {
   951  		label = optLabel[0]
   952  	}
   953  
   954  	subscriber := &subscriber{
   955  		ch: channel.NewUnboundedChan[Blocks](10, 5000, channel.Options{
   956  			Logger:  m.log,
   957  			Alerter: m.alert,
   958  			Label:   label,
   959  		}),
   960  		done: make(chan struct{}),
   961  	}
   962  
   963  	subscriber.unsubscribe = func() {
   964  		close(subscriber.done)
   965  		subscriber.ch.Close()
   966  		subscriber.ch.Flush()
   967  
   968  		m.mu.Lock()
   969  		defer m.mu.Unlock()
   970  
   971  		for i, sub := range m.subscribers {
   972  			if sub == subscriber {
   973  				m.subscribers = append(m.subscribers[:i], m.subscribers[i+1:]...)
   974  				return
   975  			}
   976  		}
   977  	}
   978  
   979  	m.subscribers = append(m.subscribers, subscriber)
   980  
   981  	return subscriber
   982  }
   983  
   984  func (m *Monitor) Chain() *Chain {
   985  	return m.chain
   986  }
   987  
   988  // LatestBlock will return the head block of the canonical chain
   989  func (m *Monitor) LatestBlock() *Block {
   990  	return m.chain.Head()
   991  }
   992  
   993  // LatestBlockNum returns the latest block number in the canonical chain
   994  func (m *Monitor) LatestBlockNum() *big.Int {
   995  	latestBlock := m.LatestBlock()
   996  	if latestBlock == nil {
   997  		return big.NewInt(0)
   998  	} else {
   999  		return big.NewInt(0).Set(latestBlock.Number())
  1000  	}
  1001  }
  1002  
  1003  // LatestReadyBlock returns the latest block in the canonical chain
  1004  // which has block.OK state to true, as in all details are available for the block.
  1005  func (m *Monitor) LatestReadyBlock() *Block {
  1006  	return m.chain.ReadyHead()
  1007  }
  1008  
  1009  // LatestReadyBlockNum returns the latest block number in the canonical chain
  1010  // which has block.OK state to true, as in all details are available for the block.
  1011  func (m *Monitor) LatestReadyBlockNum() *big.Int {
  1012  	latestBlock := m.LatestReadyBlock()
  1013  	if latestBlock == nil {
  1014  		return big.NewInt(0)
  1015  	} else {
  1016  		return big.NewInt(0).Set(latestBlock.Number())
  1017  	}
  1018  }
  1019  
  1020  // LatestFinalBlock returns the latest block which has reached finality.
  1021  // The argument `numBlocksToFinality` should be a constant value of the number
  1022  // of blocks a particular chain needs to reach finality. Ie. on Polygon this
  1023  // value would be 120 and on Ethereum it would be 20. As the pubsub system
  1024  // publishes new blocks, this value will change, as the chain will progress
  1025  // forward. It's recommend / safe to call this method each time in a <-sub.Blocks()
  1026  // code block.
  1027  func (m *Monitor) LatestFinalBlock(numBlocksToFinality int) *Block {
  1028  	m.chain.mu.Lock()
  1029  	defer m.chain.mu.Unlock()
  1030  
  1031  	n := len(m.chain.blocks)
  1032  	if n < numBlocksToFinality+1 {
  1033  		// not enough blocks have been monitored yet
  1034  		return nil
  1035  	} else {
  1036  		// return the block at finality position from the canonical chain
  1037  		return m.chain.blocks[n-numBlocksToFinality-1]
  1038  	}
  1039  }
  1040  
  1041  func (m *Monitor) OldestBlockNum() *big.Int {
  1042  	oldestBlock := m.chain.Tail()
  1043  	if oldestBlock == nil {
  1044  		return big.NewInt(0)
  1045  	} else {
  1046  		return big.NewInt(0).Set(oldestBlock.Number())
  1047  	}
  1048  }
  1049  
  1050  // GetBlock will search the retained blocks for the hash
  1051  func (m *Monitor) GetBlock(blockHash common.Hash) *Block {
  1052  	return m.chain.GetBlock(blockHash)
  1053  }
  1054  
  1055  // GetBlock will search within the retained canonical chain for the txn hash. Passing `optMined true`
  1056  // will only return transaction which have not been removed from the chain via a reorg.
  1057  func (m *Monitor) GetTransaction(txnHash common.Hash) (*types.Transaction, Event) {
  1058  	return m.chain.GetTransaction(txnHash)
  1059  }
  1060  
  1061  // GetAverageBlockTime returns the average block time in seconds (including fractions)
  1062  func (m *Monitor) GetAverageBlockTime() float64 {
  1063  	return m.chain.GetAverageBlockTime()
  1064  }
  1065  
  1066  func (m *Monitor) NumSubscribers() int {
  1067  	m.mu.Lock()
  1068  	defer m.mu.Unlock()
  1069  	return len(m.subscribers)
  1070  }
  1071  
  1072  func (m *Monitor) UnsubscribeAll(err error) {
  1073  	m.mu.Lock()
  1074  	var subs []*subscriber
  1075  	subs = append(subs, m.subscribers...)
  1076  	m.mu.Unlock()
  1077  
  1078  	for _, sub := range subs {
  1079  		sub.err = err
  1080  		sub.Unsubscribe()
  1081  	}
  1082  }
  1083  
  1084  // PurgeHistory clears all but the head of the chain. Useful for tests, but should almost
  1085  // never be used in a normal application.
  1086  func (m *Monitor) PurgeHistory() {
  1087  	m.mu.Lock()
  1088  	defer m.mu.Unlock()
  1089  	if len(m.chain.blocks) > 1 {
  1090  		m.chain.mu.Lock()
  1091  		defer m.chain.mu.Unlock()
  1092  		m.chain.blocks = m.chain.blocks[1:1]
  1093  	}
  1094  }
  1095  
  1096  func (m *Monitor) setPayload(value []byte) []byte {
  1097  	if m.options.RetainPayloads {
  1098  		return value
  1099  	} else {
  1100  		return nil
  1101  	}
  1102  }
  1103  
  1104  func getChainID(ctx context.Context, provider ethrpc.Interface) (*big.Int, error) {
  1105  	var chainID *big.Int
  1106  	err := breaker.Do(ctx, func() error {
  1107  		ctx, cancel := context.WithTimeout(ctx, 4*time.Second)
  1108  		defer cancel()
  1109  
  1110  		id, err := provider.ChainID(ctx)
  1111  		if err != nil {
  1112  			return err
  1113  		}
  1114  		chainID = id
  1115  		return nil
  1116  	}, nil, 1*time.Second, 2, 3)
  1117  
  1118  	if err != nil {
  1119  		return nil, err
  1120  	}
  1121  
  1122  	return chainID, nil
  1123  }
  1124  
  1125  func clampDuration(x, y time.Duration) time.Duration {
  1126  	if x > y {
  1127  		return x
  1128  	} else {
  1129  		return y
  1130  	}
  1131  }
  1132  
  1133  func unmarshalBlock(blockPayload []byte) (*types.Block, error) {
  1134  	var block *types.Block
  1135  	err := ethrpc.IntoBlock(blockPayload, &block)
  1136  	if err != nil {
  1137  		return nil, err
  1138  	}
  1139  	return block, nil
  1140  }
  1141  
  1142  func unmarshalLogs(logsPayload []byte) ([]types.Log, error) {
  1143  	var logs []types.Log
  1144  	err := json.Unmarshal(logsPayload, &logs)
  1145  	if err != nil {
  1146  		return nil, err
  1147  	}
  1148  	return logs, nil
  1149  }