github.com/koko1123/flow-go-1@v0.29.6/module/epochs/qc_voter.go (about)

     1  package epochs
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/koko1123/flow-go-1/module/retrymiddleware"
    10  
    11  	"github.com/sethvargo/go-retry"
    12  
    13  	"github.com/rs/zerolog"
    14  
    15  	"github.com/koko1123/flow-go-1/consensus/hotstuff"
    16  	hotmodel "github.com/koko1123/flow-go-1/consensus/hotstuff/model"
    17  	"github.com/koko1123/flow-go-1/model/flow"
    18  	"github.com/koko1123/flow-go-1/module"
    19  	clusterstate "github.com/koko1123/flow-go-1/state/cluster"
    20  	"github.com/koko1123/flow-go-1/state/protocol"
    21  )
    22  
    23  const (
    24  	// retryDuration is the initial duration to wait between retries for all retryable
    25  	// requests - increases exponentially for subsequent retries
    26  	retryDuration = time.Second
    27  
    28  	// update qc contract client after 2 consecutive failures
    29  	retryMaxConsecutiveFailures = 2
    30  
    31  	// retryDurationMax is the maximum duration to wait between two consecutive requests
    32  	retryDurationMax = 10 * time.Minute
    33  
    34  	// retryJitterPercent is the percentage jitter to introduce to each retry interval
    35  	retryJitterPercent = 25 // 25%
    36  )
    37  
    38  // RootQCVoter is responsible for generating and submitting votes for the
    39  // root quorum certificate of the upcoming epoch for this node's cluster.
    40  type RootQCVoter struct {
    41  	log                       zerolog.Logger
    42  	me                        module.Local
    43  	signer                    hotstuff.Signer
    44  	state                     protocol.State
    45  	qcContractClients         []module.QCContractClient // priority ordered array of client to the QC aggregator smart contract
    46  	lastSuccessfulClientIndex int                       // index of the contract client that was last successful during retries
    47  	wait                      time.Duration             // how long to sleep in between vote attempts
    48  	mu                        sync.Mutex
    49  }
    50  
    51  // NewRootQCVoter returns a new root QC voter, configured for a particular epoch.
    52  func NewRootQCVoter(
    53  	log zerolog.Logger,
    54  	me module.Local,
    55  	signer hotstuff.Signer,
    56  	state protocol.State,
    57  	contractClients []module.QCContractClient,
    58  ) *RootQCVoter {
    59  
    60  	voter := &RootQCVoter{
    61  		log:               log.With().Str("module", "root_qc_voter").Logger(),
    62  		me:                me,
    63  		signer:            signer,
    64  		state:             state,
    65  		qcContractClients: contractClients,
    66  		wait:              time.Second * 10,
    67  		mu:                sync.Mutex{},
    68  	}
    69  	return voter
    70  }
    71  
    72  // Vote handles the full procedure of generating a vote, submitting it to the
    73  // epoch smart contract, and verifying submission. Returns an error only if
    74  // there is a critical error that would make it impossible for the vote to be
    75  // submitted. Otherwise, exits when the vote has been successfully submitted.
    76  //
    77  // It is safe to run multiple times within a single setup phase.
    78  func (voter *RootQCVoter) Vote(ctx context.Context, epoch protocol.Epoch) error {
    79  
    80  	counter, err := epoch.Counter()
    81  	if err != nil {
    82  		return fmt.Errorf("could not get epoch counter: %w", err)
    83  	}
    84  	clusters, err := epoch.Clustering()
    85  	if err != nil {
    86  		return fmt.Errorf("could not get clustering: %w", err)
    87  	}
    88  	cluster, clusterIndex, ok := clusters.ByNodeID(voter.me.NodeID())
    89  	if !ok {
    90  		return fmt.Errorf("could not find self in clustering")
    91  	}
    92  
    93  	log := voter.log.With().
    94  		Uint64("epoch", counter).
    95  		Uint("cluster_index", clusterIndex).
    96  		Logger()
    97  
    98  	log.Info().Msg("preparing to generate vote for cluster root qc")
    99  
   100  	// create the canonical root block for our cluster
   101  	root := clusterstate.CanonicalRootBlock(counter, cluster)
   102  	// create a signable hotstuff model
   103  	signable := hotmodel.GenesisBlockFromFlow(root.Header)
   104  
   105  	vote, err := voter.signer.CreateVote(signable)
   106  	if err != nil {
   107  		return fmt.Errorf("could not create vote for cluster root qc: %w", err)
   108  	}
   109  
   110  	// this backoff configuration will never terminate on its own, but the
   111  	// request logic will exit when we exit the EpochSetup phase
   112  	backoff := retry.NewExponential(retryDuration)
   113  	backoff = retry.WithCappedDuration(retryDurationMax, backoff)
   114  	backoff = retry.WithJitterPercent(retryJitterPercent, backoff)
   115  
   116  	clientIndex, qcContractClient := voter.getInitialContractClient()
   117  	onMaxConsecutiveRetries := func(totalAttempts int) {
   118  		voter.updateContractClient(clientIndex)
   119  		log.Warn().Msgf("retrying on attempt (%d) with fallback access node at index (%d)", totalAttempts, clientIndex)
   120  	}
   121  	backoff = retrymiddleware.AfterConsecutiveFailures(retryMaxConsecutiveFailures, backoff, onMaxConsecutiveRetries)
   122  
   123  	err = retry.Do(ctx, backoff, func(ctx context.Context) error {
   124  		// check that we're still in the setup phase, if we're not we can't
   125  		// submit a vote anyway and must exit this process
   126  		phase, err := voter.state.Final().Phase()
   127  		if err != nil {
   128  			log.Error().Err(err).Msg("could not get current phase")
   129  		} else if phase != flow.EpochPhaseSetup {
   130  			return fmt.Errorf("could not submit vote - no longer in setup phase")
   131  		}
   132  
   133  		// check whether we've already voted, if we have we can exit early
   134  		voted, err := qcContractClient.Voted(ctx)
   135  		if err != nil {
   136  			log.Error().Err(err).Msg("could not check vote status")
   137  			return retry.RetryableError(err)
   138  		} else if voted {
   139  			log.Info().Msg("already voted - exiting QC vote process...")
   140  			// update our last successful client index for future calls
   141  			voter.updateLastSuccessfulClient(clientIndex)
   142  			return nil
   143  		}
   144  
   145  		// submit the vote - this call will block until the transaction has
   146  		// either succeeded or we are able to retry
   147  		log.Info().Msg("submitting vote...")
   148  		err = qcContractClient.SubmitVote(ctx, vote)
   149  		if err != nil {
   150  			log.Error().Err(err).Msg("could not submit vote - retrying...")
   151  			return retry.RetryableError(err)
   152  		}
   153  
   154  		log.Info().Msg("successfully submitted vote - exiting QC vote process...")
   155  
   156  		// update our last successful client index for future calls
   157  		voter.updateLastSuccessfulClient(clientIndex)
   158  		return nil
   159  	})
   160  
   161  	return err
   162  }
   163  
   164  // updateContractClient will return the last successful client index by default for all initial operations or else
   165  // it will return the appropriate client index with respect to last successful and number of client.
   166  func (voter *RootQCVoter) updateContractClient(clientIndex int) (int, module.QCContractClient) {
   167  	voter.mu.Lock()
   168  	defer voter.mu.Unlock()
   169  	if clientIndex == voter.lastSuccessfulClientIndex {
   170  		if clientIndex == len(voter.qcContractClients)-1 {
   171  			clientIndex = 0
   172  		} else {
   173  			clientIndex++
   174  		}
   175  	} else {
   176  		clientIndex = voter.lastSuccessfulClientIndex
   177  	}
   178  
   179  	return clientIndex, voter.qcContractClients[clientIndex]
   180  }
   181  
   182  // getInitialContractClient will return the last successful contract client or the initial
   183  func (voter *RootQCVoter) getInitialContractClient() (int, module.QCContractClient) {
   184  	voter.mu.Lock()
   185  	defer voter.mu.Unlock()
   186  	return voter.lastSuccessfulClientIndex, voter.qcContractClients[voter.lastSuccessfulClientIndex]
   187  }
   188  
   189  // updateLastSuccessfulClient set lastSuccessfulClientIndex in concurrency safe way
   190  func (voter *RootQCVoter) updateLastSuccessfulClient(clientIndex int) {
   191  	voter.mu.Lock()
   192  	defer voter.mu.Unlock()
   193  
   194  	voter.lastSuccessfulClientIndex = clientIndex
   195  }