code.vegaprotocol.io/vega@v0.79.0/core/validators/heartbeat.go (about)

     1  // Copyright (C) 2023 Gobalsky Labs Limited
     2  //
     3  // This program is free software: you can redistribute it and/or modify
     4  // it under the terms of the GNU Affero General Public License as
     5  // published by the Free Software Foundation, either version 3 of the
     6  // License, or (at your option) any later version.
     7  //
     8  // This program is distributed in the hope that it will be useful,
     9  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    10  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    11  // GNU Affero General Public License for more details.
    12  //
    13  // You should have received a copy of the GNU Affero General Public License
    14  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    15  
    16  package validators
    17  
    18  import (
    19  	"context"
    20  	"encoding/hex"
    21  	"errors"
    22  	"fmt"
    23  	"hash/fnv"
    24  	"sort"
    25  	"time"
    26  
    27  	"code.vegaprotocol.io/vega/core/nodewallets/eth/clef"
    28  	"code.vegaprotocol.io/vega/core/txn"
    29  	vgcontext "code.vegaprotocol.io/vega/libs/context"
    30  	"code.vegaprotocol.io/vega/logging"
    31  	commandspb "code.vegaprotocol.io/vega/protos/vega/commands/v1"
    32  
    33  	"github.com/cenkalti/backoff"
    34  	ecrypto "github.com/ethereum/go-ethereum/crypto"
    35  )
    36  
    37  var ErrHeartbeatHasExpired = errors.New("heartbeat received after expiry")
    38  
    39  // validatorHeartbeatTracker keeps track of heartbeat transactions and their results.
    40  type validatorHeartbeatTracker struct {
    41  	// the next hash expected for this validator to sign
    42  	expectedNextHash string
    43  	// the time at which we've seen the hash
    44  	expectedNexthashSince time.Time
    45  	// the index to the last 10 signatures
    46  	blockIndex int
    47  	// last 10 signatures
    48  	blockSigs [10]bool
    49  }
    50  
    51  // recordHeartbeatResult records the result of an expected signature
    52  // if true it means that the validator has signed the correct block within a reasonable time
    53  // otherwise they either didn't sign on time or didn't sign properly.
    54  func (v *validatorHeartbeatTracker) recordHeartbeatResult(status bool) {
    55  	v.blockSigs[v.blockIndex%10] = status
    56  	v.blockIndex++
    57  	v.expectedNextHash = ""
    58  }
    59  
    60  // ProcessValidatorHeartbeat is verifying the signatures from a validator's transaction and records the status.
    61  func (t *Topology) ProcessValidatorHeartbeat(ctx context.Context, vh *commandspb.ValidatorHeartbeat,
    62  	verifyVegaSig func(message, signature, pubkey []byte) error,
    63  	verifyEthSig func(message, signature []byte, hexAddress string) error,
    64  ) error {
    65  	t.mu.RLock()
    66  	defer t.mu.RUnlock()
    67  	validator, ok := t.validators[vh.NodeId]
    68  	if !ok {
    69  		return fmt.Errorf("received an heartbeat from a non-validator node: %v", vh.NodeId)
    70  	}
    71  
    72  	var (
    73  		node = t.validators[vh.NodeId]
    74  		hash = validator.heartbeatTracker.expectedNextHash
    75  	)
    76  
    77  	if hash != vh.Message {
    78  		// the heartbeat came in too late, we're already waiting for another one
    79  		return ErrHeartbeatHasExpired
    80  	}
    81  
    82  	vegas, err := hex.DecodeString(vh.GetVegaSignature().Value)
    83  	if err != nil {
    84  		validator.heartbeatTracker.recordHeartbeatResult(false)
    85  		return err
    86  	}
    87  	vegaPubKey, err := hex.DecodeString(node.data.VegaPubKey)
    88  	if err != nil {
    89  		validator.heartbeatTracker.recordHeartbeatResult(false)
    90  		return err
    91  	}
    92  	if err := verifyVegaSig([]byte(hash), vegas, vegaPubKey); err != nil {
    93  		validator.heartbeatTracker.recordHeartbeatResult(false)
    94  		return err
    95  	}
    96  
    97  	eths, err := hex.DecodeString(vh.GetEthereumSignature().Value)
    98  	if err != nil {
    99  		validator.heartbeatTracker.recordHeartbeatResult(false)
   100  		return err
   101  	}
   102  
   103  	if err := verifyEthSig([]byte(hash), eths, node.data.EthereumAddress); err != nil {
   104  		validator.heartbeatTracker.recordHeartbeatResult(false)
   105  		return err
   106  	}
   107  
   108  	// record the success
   109  	validator.heartbeatTracker.recordHeartbeatResult(true)
   110  
   111  	return nil
   112  }
   113  
   114  // checkHeartbeat checks if there's a validator who is late on their heartbeat transaction and checks if any validator needs to send a heartbeat transaction.
   115  // if so and this validator is *this* then it sends the transaction.
   116  func (t *Topology) checkHeartbeat(ctx context.Context) {
   117  	// this is called TraceID but is actually the block hash...
   118  	_, bhash := vgcontext.TraceIDFromContext(ctx)
   119  	t.checkHeartbeatWithBlockHash(ctx, bhash)
   120  }
   121  
   122  // checkAndExpireStaleHeartbeats checks if there is a validator with stale heartbeat and records the failure.
   123  func (t *Topology) checkAndExpireStaleHeartbeats() {
   124  	// if a node hasn't sent a heartbeat when they were expected, record the failure and reset their state.
   125  	now := t.timeService.GetTimeNow()
   126  	for _, v := range t.validators {
   127  		// if the time since we've expected the heartbeat is too big,
   128  		// we consider this validator invalid
   129  		// arbitrary 500 seconds duration for the validator to send a
   130  		// heartbeat, that's ~500 blocks a 1 block per sec
   131  		hbExpired := len(v.heartbeatTracker.expectedNextHash) > 0 && v.heartbeatTracker.expectedNexthashSince.Add(t.timeToSendHeartbeat).Before(now)
   132  		if hbExpired {
   133  			v.heartbeatTracker.recordHeartbeatResult(false)
   134  		}
   135  	}
   136  }
   137  
   138  func (t *Topology) getNodesRequiringHB() []string {
   139  	validatorNeedResend := []string{}
   140  	now := t.timeService.GetTimeNow()
   141  	for k, vs := range t.validators {
   142  		if len(vs.heartbeatTracker.expectedNextHash) == 0 &&
   143  			vs.heartbeatTracker.expectedNexthashSince.Add(t.timeBetweenHeartbeats).Before(now) &&
   144  			vs.data.FromEpoch <= t.epochSeq {
   145  			validatorNeedResend = append(validatorNeedResend, k)
   146  		}
   147  	}
   148  	sort.Strings(validatorNeedResend)
   149  	return validatorNeedResend
   150  }
   151  
   152  func (t *Topology) checkHeartbeatWithBlockHash(ctx context.Context, bhash string) {
   153  	t.checkAndExpireStaleHeartbeats()
   154  
   155  	// check which node
   156  	validatorNeedResend := t.getNodesRequiringHB()
   157  	if len(validatorNeedResend) == 0 {
   158  		return
   159  	}
   160  
   161  	// select deterministically which validator would send a heartbeat this round if they need to.
   162  	selectedValidator := selectValidatorForHeartbeat(bhash, validatorNeedResend)
   163  	validator := t.validators[selectedValidator]
   164  
   165  	// time for another round
   166  	validator.heartbeatTracker.expectedNextHash = bhash
   167  	validator.heartbeatTracker.expectedNexthashSince = t.timeService.GetTimeNow()
   168  
   169  	// now we figure out if we need to send a heartbeat now
   170  	if !t.isValidatorSetup || selectedValidator != t.SelfNodeID() {
   171  		// not a validator, go home
   172  		return
   173  	}
   174  
   175  	if hb := t.prepareHeartbeat(bhash); hb != nil {
   176  		t.sendHeartbeat(ctx, hb)
   177  	}
   178  }
   179  
   180  // prepareHeartbeat prepares a heartbeat transaction.
   181  func (t *Topology) prepareHeartbeat(blockHash string) *commandspb.ValidatorHeartbeat {
   182  	blockHashBytes := []byte(blockHash)
   183  	vegaSignature, err := t.wallets.GetVega().Sign(blockHashBytes)
   184  	if err != nil {
   185  		t.log.Error("could not sign heartbeat with vega wallet",
   186  			logging.String("block-hash", blockHash),
   187  			logging.Error(err),
   188  		)
   189  		return nil
   190  	}
   191  
   192  	signer := t.wallets.GetEthereum()
   193  	if signer.Algo() != clef.ClefAlgoType {
   194  		// hash our message before signing it
   195  		blockHashBytes = ecrypto.Keccak256(blockHashBytes)
   196  	}
   197  	ethereumSignature, err := signer.Sign(blockHashBytes)
   198  	if err != nil {
   199  		t.log.Error("could not sign heartbeat with ethereum wallet",
   200  			logging.String("block-hash", blockHash),
   201  			logging.Error(err),
   202  		)
   203  		return nil
   204  	}
   205  
   206  	return &commandspb.ValidatorHeartbeat{
   207  		NodeId: t.SelfNodeID(),
   208  		VegaSignature: &commandspb.Signature{
   209  			Value: hex.EncodeToString(vegaSignature),
   210  			Algo:  t.wallets.GetVega().Algo(),
   211  		},
   212  		EthereumSignature: &commandspb.Signature{
   213  			Value: hex.EncodeToString(ethereumSignature),
   214  			Algo:  signer.Algo(),
   215  		},
   216  		Message: blockHash,
   217  	}
   218  }
   219  
   220  // sendHeartbeat sends the hearbeat transaction.
   221  func (t *Topology) sendHeartbeat(ctx context.Context, hb *commandspb.ValidatorHeartbeat) {
   222  	bo := backoff.NewExponentialBackOff()
   223  	bo.MaxElapsedTime = t.timeToSendHeartbeat
   224  	bo.InitialInterval = 1 * time.Second
   225  
   226  	t.log.Debug("sending heartbeat", logging.String("nodeID", hb.NodeId))
   227  	t.cmd.CommandSync(ctx, txn.ValidatorHeartbeatCommand, hb, func(_ string, err error) {
   228  		if err != nil {
   229  			t.log.Error("couldn't send validator heartbeat", logging.Error(err))
   230  			return
   231  		}
   232  		t.log.Debug("heartbeat sent", logging.String("nodeID", hb.NodeId))
   233  	}, bo)
   234  }
   235  
   236  // selectValidatorForHeartbeat selects a validator for sending heartbeat transaction.
   237  func selectValidatorForHeartbeat(bhash string, validators []string) string {
   238  	h := fnv.New64a()
   239  	h.Write([]byte(bhash))
   240  	index := h.Sum64() % uint64(len(validators))
   241  	return validators[index]
   242  }