github.com/klaytn/klaytn@v1.12.1/node/sc/vt_recovery.go (about)

     1  // Copyright 2019 The klaytn Authors
     2  // This file is part of the klaytn library.
     3  //
     4  // The klaytn library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The klaytn library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the klaytn library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package sc
    18  
    19  import (
    20  	"sync"
    21  	"time"
    22  
    23  	"github.com/klaytn/klaytn/accounts/abi/bind"
    24  	"github.com/pkg/errors"
    25  )
    26  
    27  var (
    28  	filterLogsStride = uint64(100)
    29  	maxPendingTxs    = 1000
    30  )
    31  
    32  // valueTransferHint stores the last handled block number and nonce (Request or Handle).
    33  type valueTransferHint struct {
    34  	blockNumber     uint64 // block number to start searching event logs
    35  	requestNonce    uint64
    36  	handleNonce     uint64
    37  	prevHandleNonce uint64 // previous handleNonce between recovery interval
    38  	candidate       bool   // to check recovery candidate between recovery interval
    39  }
    40  
    41  // valueTransferRecovery stores status information for the value transfer recovery.
    42  type valueTransferRecovery struct {
    43  	stopCh    chan interface{}
    44  	isRunning bool           // to check duplicated start
    45  	wg        sync.WaitGroup // wait group to handle the Stop() sync
    46  
    47  	child2parentHint *valueTransferHint
    48  	parent2childHint *valueTransferHint
    49  	childEvents      []IRequestValueTransferEvent
    50  	parentEvents     []IRequestValueTransferEvent
    51  
    52  	config      *SCConfig
    53  	cBridgeInfo *BridgeInfo
    54  	pBridgeInfo *BridgeInfo
    55  }
    56  
    57  var (
    58  	ErrVtrDisabled       = errors.New("VTR is disabled")
    59  	ErrVtrAlreadyStarted = errors.New("VTR is already started")
    60  )
    61  
    62  func isHandledEvent(to *BridgeInfo, ev IRequestValueTransferEvent) bool {
    63  	blk, err := to.bridge.HandleNoncesToBlockNums(nil, ev.GetRequestNonce())
    64  	if err == nil && blk > 0 {
    65  		logger.Trace("skip handled event", "nonce", ev.GetRequestNonce())
    66  		return true
    67  	}
    68  	return false
    69  }
    70  
    71  // NewValueTransferRecovery creates a new value transfer recovery structure.
    72  func NewValueTransferRecovery(config *SCConfig, cBridgeInfo, pBridgeInfo *BridgeInfo) *valueTransferRecovery {
    73  	return &valueTransferRecovery{
    74  		stopCh:           make(chan interface{}),
    75  		isRunning:        false,
    76  		wg:               sync.WaitGroup{},
    77  		child2parentHint: &valueTransferHint{},
    78  		parent2childHint: &valueTransferHint{},
    79  		childEvents:      []IRequestValueTransferEvent{},
    80  		parentEvents:     []IRequestValueTransferEvent{},
    81  		config:           config,
    82  		cBridgeInfo:      cBridgeInfo,
    83  		pBridgeInfo:      pBridgeInfo,
    84  	}
    85  }
    86  
    87  // Start implements starting all internal goroutines used by the value transfer recovery.
    88  func (vtr *valueTransferRecovery) Start() error {
    89  	if !vtr.config.VTRecovery {
    90  		return ErrVtrDisabled
    91  	}
    92  
    93  	// TODO-Klaytn-Servicechain If there is no user API to start recovery, remove isRunning in Start/Stop.
    94  	if vtr.isRunning {
    95  		return ErrVtrAlreadyStarted
    96  	}
    97  
    98  	vtr.wg.Add(1)
    99  
   100  	go func() {
   101  		ticker := time.NewTicker(time.Duration(vtr.config.VTRecoveryInterval) * time.Second)
   102  		defer func() {
   103  			ticker.Stop()
   104  			vtr.wg.Done()
   105  		}()
   106  
   107  		if err := vtr.Recover(); err != nil {
   108  			logger.Warn("initial value transfer recovery is failed", "err", err)
   109  		}
   110  
   111  		vtr.isRunning = true
   112  
   113  		for {
   114  			select {
   115  			case <-vtr.stopCh:
   116  				logger.Info("value transfer recovery is stopped")
   117  				return
   118  			case <-ticker.C:
   119  				if vtr.isRunning {
   120  					if err := vtr.Recover(); err != nil {
   121  						logger.Trace("value transfer recovery is failed", "err", err)
   122  					}
   123  				}
   124  			}
   125  		}
   126  	}()
   127  
   128  	return nil
   129  }
   130  
   131  // Stop implements terminating all internal goroutines used by the value transfer recovery.
   132  func (vtr *valueTransferRecovery) Stop() error {
   133  	if !vtr.isRunning {
   134  		logger.Info("value transfer recovery is already stopped")
   135  		return nil
   136  	}
   137  	close(vtr.stopCh)
   138  	vtr.wg.Wait()
   139  	vtr.isRunning = false
   140  	return nil
   141  }
   142  
   143  // Recover implements the whole recovery process of the value transfer recovery.
   144  func (vtr *valueTransferRecovery) Recover() error {
   145  	logger.Trace("update value transfer hint")
   146  	err := vtr.updateRecoveryHint()
   147  	if err != nil {
   148  		return err
   149  	}
   150  
   151  	logger.Trace("retrieve pending events")
   152  	err = vtr.retrievePendingEvents()
   153  	if err != nil {
   154  		return err
   155  	}
   156  
   157  	logger.Trace("recover pending events")
   158  	err = vtr.recoverPendingEvents()
   159  	if err != nil {
   160  		return err
   161  	}
   162  
   163  	return nil
   164  }
   165  
   166  // updateRecoveryHint updates hints for value transfers on the both side.
   167  // One is from child chain to parent chain, the other is from parent chain to child chain value transfers.
   168  // The hint includes a block number to begin search, request nonce and handle nonce.
   169  func (vtr *valueTransferRecovery) updateRecoveryHint() error {
   170  	if vtr.cBridgeInfo == nil {
   171  		return errors.New("child chain bridge is nil")
   172  	}
   173  	if vtr.pBridgeInfo == nil {
   174  		return errors.New("parent chain bridge is nil")
   175  	}
   176  
   177  	var err error
   178  	vtr.child2parentHint, err = updateRecoveryHintFromTo(vtr.child2parentHint, vtr.cBridgeInfo, vtr.pBridgeInfo)
   179  	if err != nil {
   180  		return err
   181  	}
   182  
   183  	vtr.parent2childHint, err = updateRecoveryHintFromTo(vtr.parent2childHint, vtr.pBridgeInfo, vtr.cBridgeInfo)
   184  	if err != nil {
   185  		return err
   186  	}
   187  
   188  	// Update the hint for the initial status.
   189  	if !vtr.isRunning {
   190  		vtr.child2parentHint.prevHandleNonce = vtr.child2parentHint.handleNonce
   191  		vtr.parent2childHint.prevHandleNonce = vtr.parent2childHint.handleNonce
   192  		vtr.child2parentHint.candidate = true
   193  		vtr.parent2childHint.candidate = true
   194  	}
   195  
   196  	return nil
   197  }
   198  
   199  // updateRecoveryHint updates a hint for the one-way value transfers.
   200  func updateRecoveryHintFromTo(prevHint *valueTransferHint, from, to *BridgeInfo) (*valueTransferHint, error) {
   201  	var err error
   202  	var hint valueTransferHint
   203  
   204  	logger.Trace("updateRecoveryHintFromTo start")
   205  	if prevHint != nil {
   206  		logger.Trace("recovery prevHint", "rnonce", prevHint.requestNonce, "hnonce", prevHint.handleNonce, "phnonce", prevHint.prevHandleNonce, "cand", prevHint.candidate)
   207  	}
   208  
   209  	hint.blockNumber, err = to.bridge.RecoveryBlockNumber(nil)
   210  	if err != nil {
   211  		return nil, err
   212  	}
   213  
   214  	requestNonce, err := from.bridge.RequestNonce(nil)
   215  	if err != nil {
   216  		return nil, err
   217  	}
   218  	from.SetRequestNonce(requestNonce)
   219  	to.SetRequestNonceFromCounterpart(requestNonce)
   220  	hint.requestNonce = requestNonce
   221  
   222  	handleNonce, err := to.bridge.LowerHandleNonce(nil)
   223  	if err != nil {
   224  		return nil, err
   225  	}
   226  	to.UpdateLowerHandleNonce(handleNonce)
   227  
   228  	if prevHint != nil {
   229  		hint.prevHandleNonce = prevHint.handleNonce
   230  		hint.candidate = prevHint.candidate
   231  	}
   232  	hint.handleNonce = handleNonce
   233  
   234  	logger.Trace("updateRecoveryHintFromTo finish", "rnonce", hint.requestNonce, "hnonce", hint.handleNonce, "phnonce", hint.prevHandleNonce, "cand", hint.candidate)
   235  
   236  	return &hint, nil
   237  }
   238  
   239  // retrievePendingEvents retrieves pending events on the child chain or parent chain.
   240  // The pending event is the value transfer without processing HandleValueTransfer.
   241  func (vtr *valueTransferRecovery) retrievePendingEvents() error {
   242  	if vtr.cBridgeInfo == nil {
   243  		return errors.New("child chain bridge is nil")
   244  	}
   245  	if vtr.pBridgeInfo == nil {
   246  		return errors.New("parent chain bridge is nil")
   247  	}
   248  
   249  	var err error
   250  	vtr.childEvents, err = retrievePendingEventsFrom(vtr.child2parentHint, vtr.cBridgeInfo, vtr.pBridgeInfo)
   251  	if err != nil {
   252  		return err
   253  	}
   254  	vtr.parentEvents, err = retrievePendingEventsFrom(vtr.parent2childHint, vtr.pBridgeInfo, vtr.cBridgeInfo)
   255  	if err != nil {
   256  		return err
   257  	}
   258  
   259  	return nil
   260  }
   261  
   262  // retrievePendingEventsFrom retrieves pending events from the specified bridge by using the hint provided.
   263  // The filter uses a hint as a search range. It returns a slice of events that has log details.
   264  func retrievePendingEventsFrom(hint *valueTransferHint, from, to *BridgeInfo) ([]IRequestValueTransferEvent, error) {
   265  	if from.bridge == nil {
   266  		return nil, errors.New("from bridge is nil")
   267  	}
   268  	if to.bridge == nil {
   269  		return nil, errors.New("to bridge is nil")
   270  	}
   271  	if hint.requestNonce == hint.handleNonce {
   272  		return nil, nil
   273  	}
   274  	if !checkRecoveryCondition(hint) {
   275  		return nil, nil
   276  	}
   277  
   278  	var pendingEvents []IRequestValueTransferEvent
   279  
   280  	curBlkNum, err := from.GetCurrentBlockNumber()
   281  	if err != nil {
   282  		return nil, err
   283  	}
   284  
   285  	startBlkNum := hint.blockNumber
   286  	endBlkNum := startBlkNum + filterLogsStride
   287  
   288  pendingTxLoop:
   289  	for startBlkNum <= curBlkNum {
   290  		if endBlkNum > curBlkNum {
   291  			endBlkNum = curBlkNum
   292  		}
   293  		opts := &bind.FilterOpts{Start: startBlkNum, End: &endBlkNum}
   294  		reqVTevIt, err := from.bridge.FilterRequestValueTransfer(opts, nil, nil, nil)
   295  		if err != nil {
   296  			return nil, err
   297  		}
   298  		reqVTencodedDataIt, err := from.bridge.FilterRequestValueTransferEncoded(opts, nil, nil, nil)
   299  		if err != nil {
   300  			return nil, err
   301  		}
   302  
   303  		// TODO: The same logic with two types are verbose and can be neat with Go1.18 which supports generic.
   304  		// For the FilterRequestValueTransfer type
   305  		for reqVTevIt.Next() {
   306  			logger.Trace("pending nonce in the RequestValueTransfer event", "requestNonce", reqVTevIt.Event.RequestNonce)
   307  			if reqVTevIt.Event.RequestNonce >= hint.handleNonce {
   308  				// Check if the event is already handled in target bridge contract
   309  				if isHandledEvent(to, RequestValueTransferEvent{reqVTevIt.Event}) {
   310  					continue
   311  				}
   312  				logger.Trace("filtered pending nonce", "requestNonce", reqVTevIt.Event.RequestNonce, "handledNonce", hint.handleNonce)
   313  				pendingEvents = append(pendingEvents, RequestValueTransferEvent{reqVTevIt.Event})
   314  				if len(pendingEvents) >= maxPendingTxs {
   315  					reqVTevIt.Close()
   316  					break pendingTxLoop
   317  				}
   318  			}
   319  		}
   320  		// For the FilterRequestValueTransferEncoded type
   321  		for reqVTencodedDataIt.Next() {
   322  			logger.Trace("pending nonce in the RequestValueTransferEncoded event", "requestNonce", reqVTencodedDataIt.Event.RequestNonce)
   323  			if reqVTencodedDataIt.Event.RequestNonce >= hint.handleNonce {
   324  				// Check if the event is already handled in target bridge contract
   325  				if isHandledEvent(to, RequestValueTransferEncodedEvent{reqVTencodedDataIt.Event}) {
   326  					continue
   327  				}
   328  				logger.Trace("filtered pending nonce", "requestNonce", reqVTencodedDataIt.Event.RequestNonce, "handledNonce", hint.handleNonce)
   329  				pendingEvents = append(pendingEvents, RequestValueTransferEncodedEvent{reqVTencodedDataIt.Event})
   330  				if len(pendingEvents) >= maxPendingTxs {
   331  					reqVTencodedDataIt.Close()
   332  					break pendingTxLoop
   333  				}
   334  			}
   335  		}
   336  		startBlkNum = endBlkNum + 1
   337  		endBlkNum = startBlkNum + filterLogsStride
   338  		reqVTevIt.Close()
   339  		reqVTencodedDataIt.Close()
   340  	}
   341  
   342  	if len(pendingEvents) > 0 {
   343  		logger.Info("retrieved pending events", "bridge", from.address.String(),
   344  			"len(pendingEvents)", len(pendingEvents), "1st nonce", pendingEvents[0].Nonce())
   345  	}
   346  	return pendingEvents, nil
   347  }
   348  
   349  // checkRecoveryCandidateCondition checks if vtr is recovery candidate or not.
   350  // candidate is introduced to check any normal request just before checking start.
   351  //
   352  // For example,
   353  //
   354  // ======== ======== ======== ========
   355  // Round    R Nonce  H Nonce  Result
   356  // ======== ======== ======== ========
   357  // 1        10       10       false
   358  // <burst requests just before checking>
   359  // 2        1000     10       ? (it can be normal but candidate)
   360  // 3        2000     10       true
   361  func checkRecoveryCandidateCondition(hint *valueTransferHint) bool {
   362  	return hint.requestNonce != hint.handleNonce && hint.prevHandleNonce == hint.handleNonce
   363  }
   364  
   365  // checkRecoveryCondition checks if recovery for the handle value transfers is needed or not.
   366  func checkRecoveryCondition(hint *valueTransferHint) bool {
   367  	if checkRecoveryCandidateCondition(hint) && hint.candidate {
   368  		hint.candidate = false
   369  		return true
   370  	}
   371  	if checkRecoveryCandidateCondition(hint) && !hint.candidate {
   372  		hint.candidate = true
   373  		return false
   374  	}
   375  	hint.candidate = false
   376  	return false
   377  }
   378  
   379  // recoverPendingEvents recovers all pending events by resending them.
   380  func (vtr *valueTransferRecovery) recoverPendingEvents() error {
   381  	defer func() {
   382  		vtr.childEvents = []IRequestValueTransferEvent{}
   383  		vtr.parentEvents = []IRequestValueTransferEvent{}
   384  	}()
   385  
   386  	if len(vtr.childEvents) > 0 {
   387  		logger.Warn("VT Recovery : Child -> Parent Chain", "cBridge", vtr.cBridgeInfo.address.String(), "events", len(vtr.childEvents))
   388  	}
   389  
   390  	vtRequestEventMeter.Mark(int64(len(vtr.childEvents)))
   391  	vtRecoveredRequestEventMeter.Mark(int64(len(vtr.childEvents)))
   392  
   393  	events := make([]IRequestValueTransferEvent, len(vtr.childEvents))
   394  	for i, event := range vtr.childEvents {
   395  		events[i] = event
   396  	}
   397  	vtr.pBridgeInfo.AddRequestValueTransferEvents(events)
   398  
   399  	if len(vtr.parentEvents) > 0 {
   400  		logger.Warn("VT Recovery : Parent -> Child Chain", "pBridge", vtr.pBridgeInfo.address.String(), "events", len(vtr.parentEvents))
   401  	}
   402  
   403  	vtHandleEventMeter.Mark(int64(len(vtr.parentEvents)))
   404  	events = make([]IRequestValueTransferEvent, len(vtr.parentEvents))
   405  	for i, event := range vtr.parentEvents {
   406  		events[i] = event
   407  	}
   408  	vtr.cBridgeInfo.AddRequestValueTransferEvents(events)
   409  
   410  	return nil
   411  }
   412  
   413  func (vtr *valueTransferRecovery) WaitRunningStatus(expected bool, timeout time.Duration) error {
   414  	for i := 0; i < int(timeout/time.Second); i++ {
   415  		if vtr.isRunning == expected {
   416  			return nil
   417  		}
   418  		time.Sleep(1 * time.Second)
   419  	}
   420  
   421  	return errors.New("timeout to wait expect value")
   422  }