code.vegaprotocol.io/vega@v0.79.0/core/validators/heartbeat.go (about) 1 // Copyright (C) 2023 Gobalsky Labs Limited 2 // 3 // This program is free software: you can redistribute it and/or modify 4 // it under the terms of the GNU Affero General Public License as 5 // published by the Free Software Foundation, either version 3 of the 6 // License, or (at your option) any later version. 7 // 8 // This program is distributed in the hope that it will be useful, 9 // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 // GNU Affero General Public License for more details. 12 // 13 // You should have received a copy of the GNU Affero General Public License 14 // along with this program. If not, see <http://www.gnu.org/licenses/>. 15 16 package validators 17 18 import ( 19 "context" 20 "encoding/hex" 21 "errors" 22 "fmt" 23 "hash/fnv" 24 "sort" 25 "time" 26 27 "code.vegaprotocol.io/vega/core/nodewallets/eth/clef" 28 "code.vegaprotocol.io/vega/core/txn" 29 vgcontext "code.vegaprotocol.io/vega/libs/context" 30 "code.vegaprotocol.io/vega/logging" 31 commandspb "code.vegaprotocol.io/vega/protos/vega/commands/v1" 32 33 "github.com/cenkalti/backoff" 34 ecrypto "github.com/ethereum/go-ethereum/crypto" 35 ) 36 37 var ErrHeartbeatHasExpired = errors.New("heartbeat received after expiry") 38 39 // validatorHeartbeatTracker keeps track of heartbeat transactions and their results. 40 type validatorHeartbeatTracker struct { 41 // the next hash expected for this validator to sign 42 expectedNextHash string 43 // the time at which we've seen the hash 44 expectedNexthashSince time.Time 45 // the index to the last 10 signatures 46 blockIndex int 47 // last 10 signatures 48 blockSigs [10]bool 49 } 50 51 // recordHeartbeatResult records the result of an expected signature 52 // if true it means that the validator has signed the correct block within a reasonable time 53 // otherwise they either didn't sign on time or didn't sign properly. 54 func (v *validatorHeartbeatTracker) recordHeartbeatResult(status bool) { 55 v.blockSigs[v.blockIndex%10] = status 56 v.blockIndex++ 57 v.expectedNextHash = "" 58 } 59 60 // ProcessValidatorHeartbeat is verifying the signatures from a validator's transaction and records the status. 61 func (t *Topology) ProcessValidatorHeartbeat(ctx context.Context, vh *commandspb.ValidatorHeartbeat, 62 verifyVegaSig func(message, signature, pubkey []byte) error, 63 verifyEthSig func(message, signature []byte, hexAddress string) error, 64 ) error { 65 t.mu.RLock() 66 defer t.mu.RUnlock() 67 validator, ok := t.validators[vh.NodeId] 68 if !ok { 69 return fmt.Errorf("received an heartbeat from a non-validator node: %v", vh.NodeId) 70 } 71 72 var ( 73 node = t.validators[vh.NodeId] 74 hash = validator.heartbeatTracker.expectedNextHash 75 ) 76 77 if hash != vh.Message { 78 // the heartbeat came in too late, we're already waiting for another one 79 return ErrHeartbeatHasExpired 80 } 81 82 vegas, err := hex.DecodeString(vh.GetVegaSignature().Value) 83 if err != nil { 84 validator.heartbeatTracker.recordHeartbeatResult(false) 85 return err 86 } 87 vegaPubKey, err := hex.DecodeString(node.data.VegaPubKey) 88 if err != nil { 89 validator.heartbeatTracker.recordHeartbeatResult(false) 90 return err 91 } 92 if err := verifyVegaSig([]byte(hash), vegas, vegaPubKey); err != nil { 93 validator.heartbeatTracker.recordHeartbeatResult(false) 94 return err 95 } 96 97 eths, err := hex.DecodeString(vh.GetEthereumSignature().Value) 98 if err != nil { 99 validator.heartbeatTracker.recordHeartbeatResult(false) 100 return err 101 } 102 103 if err := verifyEthSig([]byte(hash), eths, node.data.EthereumAddress); err != nil { 104 validator.heartbeatTracker.recordHeartbeatResult(false) 105 return err 106 } 107 108 // record the success 109 validator.heartbeatTracker.recordHeartbeatResult(true) 110 111 return nil 112 } 113 114 // checkHeartbeat checks if there's a validator who is late on their heartbeat transaction and checks if any validator needs to send a heartbeat transaction. 115 // if so and this validator is *this* then it sends the transaction. 116 func (t *Topology) checkHeartbeat(ctx context.Context) { 117 // this is called TraceID but is actually the block hash... 118 _, bhash := vgcontext.TraceIDFromContext(ctx) 119 t.checkHeartbeatWithBlockHash(ctx, bhash) 120 } 121 122 // checkAndExpireStaleHeartbeats checks if there is a validator with stale heartbeat and records the failure. 123 func (t *Topology) checkAndExpireStaleHeartbeats() { 124 // if a node hasn't sent a heartbeat when they were expected, record the failure and reset their state. 125 now := t.timeService.GetTimeNow() 126 for _, v := range t.validators { 127 // if the time since we've expected the heartbeat is too big, 128 // we consider this validator invalid 129 // arbitrary 500 seconds duration for the validator to send a 130 // heartbeat, that's ~500 blocks a 1 block per sec 131 hbExpired := len(v.heartbeatTracker.expectedNextHash) > 0 && v.heartbeatTracker.expectedNexthashSince.Add(t.timeToSendHeartbeat).Before(now) 132 if hbExpired { 133 v.heartbeatTracker.recordHeartbeatResult(false) 134 } 135 } 136 } 137 138 func (t *Topology) getNodesRequiringHB() []string { 139 validatorNeedResend := []string{} 140 now := t.timeService.GetTimeNow() 141 for k, vs := range t.validators { 142 if len(vs.heartbeatTracker.expectedNextHash) == 0 && 143 vs.heartbeatTracker.expectedNexthashSince.Add(t.timeBetweenHeartbeats).Before(now) && 144 vs.data.FromEpoch <= t.epochSeq { 145 validatorNeedResend = append(validatorNeedResend, k) 146 } 147 } 148 sort.Strings(validatorNeedResend) 149 return validatorNeedResend 150 } 151 152 func (t *Topology) checkHeartbeatWithBlockHash(ctx context.Context, bhash string) { 153 t.checkAndExpireStaleHeartbeats() 154 155 // check which node 156 validatorNeedResend := t.getNodesRequiringHB() 157 if len(validatorNeedResend) == 0 { 158 return 159 } 160 161 // select deterministically which validator would send a heartbeat this round if they need to. 162 selectedValidator := selectValidatorForHeartbeat(bhash, validatorNeedResend) 163 validator := t.validators[selectedValidator] 164 165 // time for another round 166 validator.heartbeatTracker.expectedNextHash = bhash 167 validator.heartbeatTracker.expectedNexthashSince = t.timeService.GetTimeNow() 168 169 // now we figure out if we need to send a heartbeat now 170 if !t.isValidatorSetup || selectedValidator != t.SelfNodeID() { 171 // not a validator, go home 172 return 173 } 174 175 if hb := t.prepareHeartbeat(bhash); hb != nil { 176 t.sendHeartbeat(ctx, hb) 177 } 178 } 179 180 // prepareHeartbeat prepares a heartbeat transaction. 181 func (t *Topology) prepareHeartbeat(blockHash string) *commandspb.ValidatorHeartbeat { 182 blockHashBytes := []byte(blockHash) 183 vegaSignature, err := t.wallets.GetVega().Sign(blockHashBytes) 184 if err != nil { 185 t.log.Error("could not sign heartbeat with vega wallet", 186 logging.String("block-hash", blockHash), 187 logging.Error(err), 188 ) 189 return nil 190 } 191 192 signer := t.wallets.GetEthereum() 193 if signer.Algo() != clef.ClefAlgoType { 194 // hash our message before signing it 195 blockHashBytes = ecrypto.Keccak256(blockHashBytes) 196 } 197 ethereumSignature, err := signer.Sign(blockHashBytes) 198 if err != nil { 199 t.log.Error("could not sign heartbeat with ethereum wallet", 200 logging.String("block-hash", blockHash), 201 logging.Error(err), 202 ) 203 return nil 204 } 205 206 return &commandspb.ValidatorHeartbeat{ 207 NodeId: t.SelfNodeID(), 208 VegaSignature: &commandspb.Signature{ 209 Value: hex.EncodeToString(vegaSignature), 210 Algo: t.wallets.GetVega().Algo(), 211 }, 212 EthereumSignature: &commandspb.Signature{ 213 Value: hex.EncodeToString(ethereumSignature), 214 Algo: signer.Algo(), 215 }, 216 Message: blockHash, 217 } 218 } 219 220 // sendHeartbeat sends the hearbeat transaction. 221 func (t *Topology) sendHeartbeat(ctx context.Context, hb *commandspb.ValidatorHeartbeat) { 222 bo := backoff.NewExponentialBackOff() 223 bo.MaxElapsedTime = t.timeToSendHeartbeat 224 bo.InitialInterval = 1 * time.Second 225 226 t.log.Debug("sending heartbeat", logging.String("nodeID", hb.NodeId)) 227 t.cmd.CommandSync(ctx, txn.ValidatorHeartbeatCommand, hb, func(_ string, err error) { 228 if err != nil { 229 t.log.Error("couldn't send validator heartbeat", logging.Error(err)) 230 return 231 } 232 t.log.Debug("heartbeat sent", logging.String("nodeID", hb.NodeId)) 233 }, bo) 234 } 235 236 // selectValidatorForHeartbeat selects a validator for sending heartbeat transaction. 237 func selectValidatorForHeartbeat(bhash string, validators []string) string { 238 h := fnv.New64a() 239 h.Write([]byte(bhash)) 240 index := h.Sum64() % uint64(len(validators)) 241 return validators[index] 242 }