github.com/ethereum-optimism/optimism@v1.7.2/packages/chain-mon/src/wd-mon/service.ts (about)

     1  import {
     2    BaseServiceV2,
     3    StandardOptions,
     4    ExpressRouter,
     5    Gauge,
     6    validators,
     7    waitForProvider,
     8  } from '@eth-optimism/common-ts'
     9  import { getOEContract, DEFAULT_L2_CONTRACT_ADDRESSES } from '@eth-optimism/sdk'
    10  import { getChainId, sleep } from '@eth-optimism/core-utils'
    11  import { Provider } from '@ethersproject/abstract-provider'
    12  import { ethers } from 'ethers'
    13  import dateformat from 'dateformat'
    14  
    15  import { version } from '../../package.json'
    16  import { DEFAULT_STARTING_BLOCK_NUMBERS } from './constants'
    17  
    18  type Options = {
    19    l1RpcProvider: Provider
    20    l2RpcProvider: Provider
    21    optimismPortalAddress: string
    22    l2ToL1MessagePasserAddress: string
    23    startBlockNumber: number
    24    eventBlockRange: number
    25    sleepTimeMs: number
    26  }
    27  
    28  type Metrics = {
    29    highestBlockNumber: Gauge
    30    withdrawalsValidated: Gauge
    31    isDetectingForgeries: Gauge
    32    nodeConnectionFailures: Gauge
    33  }
    34  
    35  type State = {
    36    portal: ethers.Contract
    37    messenger: ethers.Contract
    38    highestUncheckedBlockNumber: number
    39    faultProofWindow: number
    40    forgeryDetected: boolean
    41  }
    42  
    43  export class WithdrawalMonitor extends BaseServiceV2<Options, Metrics, State> {
    44    constructor(options?: Partial<Options & StandardOptions>) {
    45      super({
    46        version,
    47        name: 'two-step-monitor',
    48        loop: true,
    49        options: {
    50          loopIntervalMs: 1000,
    51          ...options,
    52        },
    53        optionsSpec: {
    54          l1RpcProvider: {
    55            validator: validators.provider,
    56            desc: 'Provider for interacting with L1',
    57          },
    58          l2RpcProvider: {
    59            validator: validators.provider,
    60            desc: 'Provider for interacting with L2',
    61          },
    62          optimismPortalAddress: {
    63            validator: validators.address,
    64            default: null,
    65            desc: 'Address of the OptimismPortal proxy contract on L1',
    66            public: true,
    67          },
    68          l2ToL1MessagePasserAddress: {
    69            validator: validators.address,
    70            default: DEFAULT_L2_CONTRACT_ADDRESSES.BedrockMessagePasser as string,
    71            desc: 'Address of the L2ToL1MessagePasser contract on L2',
    72            public: true,
    73          },
    74          startBlockNumber: {
    75            validator: validators.num,
    76            default: -1,
    77            desc: 'L1 block number to start checking from',
    78            public: true,
    79          },
    80          eventBlockRange: {
    81            validator: validators.num,
    82            default: 2000,
    83            desc: 'Number of blocks to query for events over per loop',
    84            public: true,
    85          },
    86          sleepTimeMs: {
    87            validator: validators.num,
    88            default: 15000,
    89            desc: 'Time in ms to sleep when waiting for a node',
    90            public: true,
    91          },
    92        },
    93        metricsSpec: {
    94          highestBlockNumber: {
    95            type: Gauge,
    96            desc: 'Highest block number (checked and known)',
    97            labels: ['type'],
    98          },
    99          withdrawalsValidated: {
   100            type: Gauge,
   101            desc: 'Latest L1 Block (checked and known)',
   102            labels: ['type'],
   103          },
   104          isDetectingForgeries: {
   105            type: Gauge,
   106            desc: '0 if state is ok. 1 or more if forged withdrawals are detected.',
   107          },
   108          nodeConnectionFailures: {
   109            type: Gauge,
   110            desc: 'Number of times node connection has failed',
   111            labels: ['layer', 'section'],
   112          },
   113        },
   114      })
   115    }
   116  
   117    async init(): Promise<void> {
   118      // Connect to L1.
   119      await waitForProvider(this.options.l1RpcProvider, {
   120        logger: this.logger,
   121        name: 'L1',
   122      })
   123  
   124      // Connect to L2.
   125      await waitForProvider(this.options.l2RpcProvider, {
   126        logger: this.logger,
   127        name: 'L2',
   128      })
   129  
   130      // Need L2 chain ID to resolve contract addresses.
   131      const l2ChainId = await getChainId(this.options.l2RpcProvider)
   132  
   133      // Create the OptimismPortal contract instance. If the optimismPortal option is not provided
   134      // then the SDK will attempt to resolve the address automatically based on the L2 chain ID. If
   135      // the SDK isn't aware of the L2 chain ID then it will throw an error that makes it clear the
   136      // user needs to provide this value explicitly.
   137      this.state.portal = getOEContract('OptimismPortal', l2ChainId, {
   138        signerOrProvider: this.options.l1RpcProvider,
   139        address: this.options.optimismPortalAddress,
   140      })
   141  
   142      // Create the L2ToL1MessagePasser contract instance. If the l2ToL1MessagePasser option is not
   143      // provided then we'll use the default address which typically should be correct. It's very
   144      // unlikely that any user would change this address so this should work in 99% of cases. If we
   145      // really wanted to be extra safe we could do some sanity checks to make sure the contract has
   146      // the interface we need but doesn't seem important for now.
   147      this.state.messenger = getOEContract('L2ToL1MessagePasser', l2ChainId, {
   148        signerOrProvider: this.options.l2RpcProvider,
   149        address: this.options.l2ToL1MessagePasserAddress,
   150      })
   151  
   152      // Previous versions of wd-mon would try to pick the starting block number automatically but
   153      // this had the possibility of missing certain withdrawals if the service was restarted at the
   154      // wrong time. Given the added complexity of finding a starting point automatically after FPAC,
   155      // it's much easier to simply start a fixed block number than trying to do something fancy. Use
   156      // the default configured in this service or use zero if no default is defined.
   157      this.state.highestUncheckedBlockNumber = this.options.startBlockNumber
   158      if (this.options.startBlockNumber === -1) {
   159        this.state.highestUncheckedBlockNumber =
   160          DEFAULT_STARTING_BLOCK_NUMBERS[l2ChainId] || 0
   161      }
   162  
   163      // Default state is that forgeries have not been detected.
   164      this.state.forgeryDetected = false
   165    }
   166  
   167    // K8s healthcheck
   168    async routes(router: ExpressRouter): Promise<void> {
   169      router.get('/healthz', async (req, res) => {
   170        return res.status(200).json({
   171          ok: !this.state.forgeryDetected,
   172        })
   173      })
   174    }
   175  
   176    async main(): Promise<void> {
   177      // Get the latest L1 block number.
   178      let latestL1BlockNumber: number
   179      try {
   180        latestL1BlockNumber = await this.options.l1RpcProvider.getBlockNumber()
   181      } catch (err) {
   182        // Log the issue so we can debug it.
   183        this.logger.error(`got error when connecting to node`, {
   184          error: err,
   185          node: 'l1',
   186          section: 'getBlockNumber',
   187        })
   188  
   189        // Increment the metric so we can detect the issue.
   190        this.metrics.nodeConnectionFailures.inc({
   191          layer: 'l1',
   192          section: 'getBlockNumber',
   193        })
   194  
   195        // Sleep for a little to give intermittent errors a chance to recover.
   196        return sleep(this.options.sleepTimeMs)
   197      }
   198  
   199      // Update highest block number metrics so we can keep track of how the service is doing.
   200      this.metrics.highestBlockNumber.set({ type: 'known' }, latestL1BlockNumber)
   201      this.metrics.highestBlockNumber.set(
   202        { type: 'checked' },
   203        this.state.highestUncheckedBlockNumber
   204      )
   205  
   206      // Check if the RPC provider is behind us for some reason. Can happen occasionally,
   207      // particularly if connected to an RPC provider that load balances over multiple nodes that
   208      // might not be perfectly in sync.
   209      if (latestL1BlockNumber <= this.state.highestUncheckedBlockNumber) {
   210        // Sleep for a little to give the RPC a chance to catch up.
   211        return sleep(this.options.sleepTimeMs)
   212      }
   213  
   214      // Generally better to use a relatively small block range because it means this service can be
   215      // used alongside many different types of L1 nodes. For instance, Geth will typically only
   216      // support a block range of 2000 blocks out of the box.
   217      const toBlockNumber = Math.min(
   218        this.state.highestUncheckedBlockNumber + this.options.eventBlockRange,
   219        latestL1BlockNumber
   220      )
   221  
   222      // Useful to log this stuff just in case we get stuck or something.
   223      this.logger.info(`checking recent blocks`, {
   224        fromBlockNumber: this.state.highestUncheckedBlockNumber,
   225        toBlockNumber,
   226      })
   227  
   228      // Query for WithdrawalProven events within the specified block range.
   229      let events: ethers.Event[]
   230      try {
   231        events = await this.state.portal.queryFilter(
   232          this.state.portal.filters.WithdrawalProven(),
   233          this.state.highestUncheckedBlockNumber,
   234          toBlockNumber
   235        )
   236      } catch (err) {
   237        // Log the issue so we can debug it.
   238        this.logger.error(`got error when connecting to node`, {
   239          error: err,
   240          node: 'l1',
   241          section: 'querying for WithdrawalProven events',
   242        })
   243  
   244        // Increment the metric so we can detect the issue.
   245        this.metrics.nodeConnectionFailures.inc({
   246          layer: 'l1',
   247          section: 'querying for WithdrawalProven events',
   248        })
   249  
   250        // Sleep for a little to give intermittent errors a chance to recover.
   251        return sleep(this.options.sleepTimeMs)
   252      }
   253  
   254      // Go over all the events and check if the withdrawal hash actually exists on L2.
   255      for (const event of events) {
   256        // Could consider using multicall here but this is efficient enough for now.
   257        const hash = event.args.withdrawalHash
   258        const exists = await this.state.messenger.sentMessages(hash)
   259  
   260        // Hopefully the withdrawal exists!
   261        if (exists) {
   262          // Unlike below we don't grab the timestamp here because it adds an unnecessary request.
   263          this.logger.info(`valid withdrawal`, {
   264            withdrawalHash: event.args.withdrawalHash,
   265          })
   266  
   267          // Bump the withdrawals metric so we can keep track.
   268          this.metrics.withdrawalsValidated.inc()
   269        } else {
   270          // Grab and format the timestamp so it's clear how much time is left.
   271          const block = await event.getBlock()
   272          const ts = `${dateformat(
   273            new Date(block.timestamp * 1000),
   274            'mmmm dS, yyyy, h:MM:ss TT',
   275            true
   276          )} UTC`
   277  
   278          // Uh oh!
   279          this.logger.error(`withdrawalHash not seen on L2`, {
   280            withdrawalHash: event.args.withdrawalHash,
   281            provenAt: ts,
   282          })
   283  
   284          // Change to forgery state.
   285          this.state.forgeryDetected = true
   286          this.metrics.isDetectingForgeries.set(1)
   287  
   288          // Return early so that we never increment the highest unchecked block number and therefore
   289          // will continue to loop on this forgery indefinitely. We probably want to change this
   290          // behavior at some point so that we keep scanning for additional forgeries since the
   291          // existence of one forgery likely implies the existence of many others.
   292          return sleep(this.options.sleepTimeMs)
   293        }
   294      }
   295  
   296      // Increment the highest unchecked block number for the next loop.
   297      this.state.highestUncheckedBlockNumber = toBlockNumber
   298  
   299      // If we got through the above without throwing an error, we should be fine to reset. Only case
   300      // where this is relevant is if something is detected as a forgery accidentally and the error
   301      // doesn't happen again on the next loop.
   302      this.state.forgeryDetected = false
   303      this.metrics.isDetectingForgeries.set(0)
   304    }
   305  }
   306  
   307  if (require.main === module) {
   308    const service = new WithdrawalMonitor()
   309    service.run()
   310  }