code.vegaprotocol.io/vega@v0.79.0/core/statevar/state_variable.go (about) 1 // Copyright (C) 2023 Gobalsky Labs Limited 2 // 3 // This program is free software: you can redistribute it and/or modify 4 // it under the terms of the GNU Affero General Public License as 5 // published by the Free Software Foundation, either version 3 of the 6 // License, or (at your option) any later version. 7 // 8 // This program is distributed in the hope that it will be useful, 9 // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 // GNU Affero General Public License for more details. 12 // 13 // You should have received a copy of the GNU Affero General Public License 14 // along with this program. If not, see <http://www.gnu.org/licenses/>. 15 16 package statevar 17 18 import ( 19 "context" 20 "errors" 21 "math/rand" 22 "sort" 23 "sync" 24 "time" 25 26 "code.vegaprotocol.io/vega/core/events" 27 "code.vegaprotocol.io/vega/core/txn" 28 "code.vegaprotocol.io/vega/core/types/statevar" 29 "code.vegaprotocol.io/vega/libs/num" 30 "code.vegaprotocol.io/vega/logging" 31 vegapb "code.vegaprotocol.io/vega/protos/vega" 32 commandspb "code.vegaprotocol.io/vega/protos/vega/commands/v1" 33 ) 34 35 // ConsensusState trakcs the state transitions of a state variable. 36 type ConsensusState int 37 38 const ( 39 ConsensusStateUnspecified ConsensusState = iota 40 ConsensusStateCalculationStarted 41 ConsensusStatePerfectMatch 42 ConsensusStateSeekingConsensus 43 ConsensusStateconsensusReachedLocked 44 ConsensusStateCalculationAborted 45 ConsensusStateError 46 ConsensusStateStale 47 ) 48 49 var stateToName = map[ConsensusState]string{ 50 ConsensusStateUnspecified: "undefined", 51 ConsensusStateCalculationStarted: "consensus_calc_started", 52 ConsensusStatePerfectMatch: "perfect_match", 53 ConsensusStateSeekingConsensus: "seeking_consensus", 54 ConsensusStateconsensusReachedLocked: "consensus_reached", 55 ConsensusStateCalculationAborted: "consensus_calc_aborted", 56 ConsensusStateError: "error", 57 } 58 59 type StateVariable struct { 60 log *logging.Logger 61 top Topology 62 cmd Commander 63 broker Broker 64 ID string // the unique identifier of the state variable 65 asset string // the asset of the state variable - used for filtering relevant events 66 market string // the market of the state variable - used for filtering relevant events 67 converter statevar.Converter // convert to/from the key/value bundle model into typed result model 68 startCalculation func(string, statevar.FinaliseCalculation) // a callback to the owner to start the calculation of the value of the state variable 69 result func(context.Context, statevar.StateVariableResult) error // a callback to be called when the value reaches consensus 70 71 state ConsensusState // the current status of consensus 72 eventID string // the event ID triggering the calculation 73 validatorResults map[string]*statevar.KeyValueBundle // the result of the calculation as received from validators 74 roundsSinceMeaningfulUpdate uint 75 pendingEvents []pendingEvent 76 lock sync.Mutex 77 78 currentTime time.Time 79 80 // use retries to workaround transactions go missing in tendermint 81 lastSentSelfBundle *commandspb.StateVariableProposal 82 lastSentSelfBundleTime time.Time 83 } 84 85 func NewStateVar( 86 log *logging.Logger, 87 broker Broker, 88 top Topology, 89 cmd Commander, 90 currentTime time.Time, 91 ID, asset, 92 market string, 93 converter statevar.Converter, 94 startCalculation func(string, statevar.FinaliseCalculation), 95 trigger []statevar.EventType, 96 result func(context.Context, statevar.StateVariableResult) error, 97 ) *StateVariable { 98 sv := &StateVariable{ 99 log: log, 100 broker: broker, 101 top: top, 102 cmd: cmd, 103 ID: ID, 104 asset: asset, 105 market: market, 106 converter: converter, 107 startCalculation: startCalculation, 108 result: result, 109 state: ConsensusStateUnspecified, 110 validatorResults: map[string]*statevar.KeyValueBundle{}, 111 roundsSinceMeaningfulUpdate: 0, 112 } 113 return sv 114 } 115 116 // GetAsset returns the asset of the state variable. 117 func (sv *StateVariable) GetAsset() string { 118 return sv.asset 119 } 120 121 // GetMarket returns the market of the state variable. 122 func (sv *StateVariable) GetMarket() string { 123 return sv.market 124 } 125 126 // endBlock is called at the end of the block to flush the event. This is snapshot-friendly so that at the end of the block we clear all events as opposed to doing the same at the beginning of the block. 127 func (sv *StateVariable) endBlock(ctx context.Context) { 128 sv.lock.Lock() 129 evts := make([]events.Event, 0, len(sv.pendingEvents)) 130 for _, pending := range sv.pendingEvents { 131 newEvt := events.NewStateVarEvent(ctx, sv.ID, pending.eventID, pending.state) 132 evts = append(evts, newEvt) 133 protoEvt := newEvt.Proto() 134 if sv.log.IsDebug() { 135 sv.log.Debug("state-var event sent", logging.String("event", protoEvt.String())) 136 } 137 } 138 sv.pendingEvents = []pendingEvent{} 139 sv.lock.Unlock() 140 sv.broker.SendBatch(evts) 141 } 142 143 func (sv *StateVariable) startBlock(t time.Time) { 144 sv.lock.Lock() 145 sv.currentTime = t 146 147 // if we have an active event, and we sent the bundle and we're 5 seconds after sending the bundle and haven't received our self bundle 148 // that means the transaction may have gone missing, let's retry sending it. 149 needsResend := false 150 if sv.eventID != "" && sv.lastSentSelfBundle != nil && t.After(sv.lastSentSelfBundleTime.Add(5*time.Second)) { 151 sv.lastSentSelfBundleTime = t 152 needsResend = true 153 } 154 sv.lock.Unlock() 155 if needsResend { 156 sv.logAndRetry(errors.New("consensus not reached - timeout expired"), sv.lastSentSelfBundle) 157 } 158 } 159 160 // calculation is required for the state variable for the given event id. 161 func (sv *StateVariable) eventTriggered(eventID string) { 162 sv.lock.Lock() 163 164 if sv.log.IsDebug() { 165 sv.log.Debug("event triggered", logging.String("state-var", sv.ID), logging.String("event-id", eventID)) 166 } 167 // if we get a new event while processing an existing event we abort the current calculation and start a new one 168 if sv.eventID != "" { 169 if sv.log.GetLevel() <= logging.DebugLevel { 170 sv.log.Debug("aborting state variable event", logging.String("state-var", sv.ID), logging.String("aborted-event-id", sv.eventID), logging.String("new-event-id", sv.eventID)) 171 } 172 173 // reset the last bundle so we don't send it by mistake 174 sv.lastSentSelfBundle = nil 175 176 // if we got a new event and were not in consensus, increase the number of rounds with no consensus and if 177 // we've not had a meaningful update - send an event with stale state 178 if sv.state == ConsensusStateSeekingConsensus { 179 sv.roundsSinceMeaningfulUpdate++ 180 if sv.roundsSinceMeaningfulUpdate >= 3 { 181 sv.state = ConsensusStateStale 182 sv.addEventLocked() 183 } 184 } 185 186 sv.state = ConsensusStateCalculationAborted 187 sv.addEventLocked() 188 } 189 190 // reset any existing state 191 sv.eventID = eventID 192 sv.validatorResults = map[string]*statevar.KeyValueBundle{} 193 sv.state = ConsensusStateCalculationStarted 194 sv.addEventLocked() 195 196 sv.lock.Unlock() 197 198 // kickoff calculation 199 sv.startCalculation(sv.eventID, sv) 200 } 201 202 // CalculationFinished is called from the owner when the calculation is completed to kick off consensus. 203 func (sv *StateVariable) CalculationFinished(eventID string, result statevar.StateVariableResult, err error) { 204 sv.lock.Lock() 205 if sv.eventID != eventID { 206 sv.log.Warn("ignoring recevied the result of a calculation of an old eventID", logging.String("state-var", sv.ID), logging.String("event-id", eventID)) 207 } 208 if err != nil { 209 sv.log.Error("could not calculate state for", logging.String("id", sv.ID), logging.String("event-id", eventID)) 210 sv.state = ConsensusStateError 211 sv.addEventLocked() 212 sv.eventID = "" 213 sv.lock.Unlock() 214 return 215 } 216 217 if !sv.top.IsValidator() { 218 // if we're a non-validator we still need to do the calculation so that the snapshot will be in sync with 219 // a validators, but now we're here we do not need to actually send in our results. 220 sv.lock.Unlock() 221 return 222 } 223 224 // save our result and send the result to vega to be updated by other nodes. 225 kvb := sv.converter.InterfaceToBundle(result).ToProto() 226 227 // this is a test feature that adds noise up to the tolerance to the state variable 228 // it should be excluded by build tag for production 229 kvb = sv.AddNoise(kvb) 230 231 svp := &commandspb.StateVariableProposal{ 232 Proposal: &vegapb.StateValueProposal{ 233 StateVarId: sv.ID, 234 EventId: sv.eventID, 235 Kvb: kvb, 236 }, 237 } 238 239 // set the bundle and the time 240 sv.lastSentSelfBundle = svp 241 sv.lastSentSelfBundleTime = sv.currentTime 242 243 // need to release the lock before we send the transaction command 244 sv.lock.Unlock() 245 sv.cmd.Command(context.Background(), txn.StateVariableProposalCommand, svp, func(_ string, err error) { sv.logAndRetry(err, svp) }, nil) 246 if sv.log.GetLevel() <= logging.DebugLevel { 247 sv.log.Debug("result calculated and sent to vega", logging.String("validator", sv.top.SelfNodeID()), logging.String("state-var", sv.ID), logging.String("event-id", eventID)) 248 } 249 } 250 251 // logAndRetry logs errors from tendermint transaction submission failure and retries if we're still handling the same event. 252 func (sv *StateVariable) logAndRetry(err error, svp *commandspb.StateVariableProposal) { 253 if err == nil { 254 return 255 } 256 sv.lock.Lock() 257 sv.log.Error("failed to send state variable proposal command", logging.String("id", sv.ID), logging.String("event-id", sv.eventID), logging.Error(err)) 258 if svp.Proposal.EventId == sv.eventID { 259 sv.lock.Unlock() 260 if sv.log.IsDebug() { 261 sv.log.Debug("retrying to send state variable proposal command", logging.String("id", sv.ID), logging.String("event-id", sv.eventID)) 262 } 263 sv.cmd.Command(context.Background(), txn.StateVariableProposalCommand, svp, func(_ string, err error) { sv.logAndRetry(err, svp) }, nil) 264 return 265 } 266 sv.lock.Unlock() 267 } 268 269 // bundleReceived is called when we get a result from another validator corresponding to a given event ID. 270 func (sv *StateVariable) bundleReceived(ctx context.Context, node, eventID string, bundle *statevar.KeyValueBundle, rng *rand.Rand, validatorVotesRequired num.Decimal) { 271 sv.lock.Lock() 272 defer sv.lock.Unlock() 273 274 // if the bundle is received for a stale or wrong event, ignore it 275 if sv.eventID != eventID { 276 sv.log.Debug("received a result for a stale event", logging.String("ID", sv.ID), logging.String("from-node", node), logging.String("current-even-id", sv.eventID), logging.String("receivedEventID", eventID)) 277 return 278 } 279 280 // if for some reason we received a result from a non validator node, ignore it 281 if !sv.top.IsValidatorVegaPubKey(node) { 282 sv.log.Debug("state var bundle received from a non validator node - ignoring", logging.String("from-validator", node), logging.String("state-var", sv.ID), logging.String("eventID", eventID)) 283 return 284 } 285 286 if sv.top.SelfNodeID() == node { 287 sv.lastSentSelfBundle = nil 288 sv.lastSentSelfBundleTime = time.Time{} 289 sv.log.Debug("state var bundle received self vote", logging.String("from-validator", node), logging.String("state-var", sv.ID), logging.String("eventID", eventID)) 290 } 291 292 if sv.log.GetLevel() <= logging.DebugLevel { 293 sv.log.Debug("state var bundle received", logging.String("from-validator", node), logging.String("state-var", sv.ID), logging.String("event-id", eventID)) 294 } 295 296 if sv.state == ConsensusStatePerfectMatch || sv.state == ConsensusStateconsensusReachedLocked { 297 sv.log.Debug("state var bundle received, consensus already reached", logging.String("from-validator", node), logging.String("state-var", sv.ID), logging.String("event-id", eventID)) 298 return 299 } 300 301 // save the result from the validator and check if we have a quorum 302 sv.validatorResults[node] = bundle 303 304 // calculate how much voting power is required for majority 305 requiredVotingPower := validatorVotesRequired.Mul(num.DecimalFromInt64(sv.top.GetTotalVotingPower())) 306 307 // calculate how much voting power is represented by the voters 308 bundlesVotingPower := num.DecimalZero() 309 for k := range sv.validatorResults { 310 bundlesVotingPower = bundlesVotingPower.Add(num.DecimalFromInt64(sv.top.GetVotingPower(k))) 311 } 312 313 if sv.log.IsDebug() { 314 sv.log.Debug("received results for state variable", logging.String("state-var", sv.ID), logging.String("event-id", eventID), logging.Decimal("received-voting-power", bundlesVotingPower), logging.String("out-of", requiredVotingPower.String())) 315 } 316 317 if bundlesVotingPower.LessThan(requiredVotingPower) { 318 if sv.log.GetLevel() <= logging.DebugLevel { 319 sv.log.Debug("waiting for more results for state variable consensus check", logging.String("state-var", sv.ID), logging.Decimal("received-voting-power", bundlesVotingPower), logging.String("out-of", requiredVotingPower.String())) 320 } 321 return 322 } 323 324 // if we're already in seeking consensus state, no point in checking if all match - suffice checking if there's a majority with matching within tolerance 325 if sv.state == ConsensusStateSeekingConsensus { 326 sv.tryConsensusLocked(ctx, rng, requiredVotingPower) 327 return 328 } 329 330 if sv.log.GetLevel() <= logging.DebugLevel { 331 sv.log.Debug("state var checking consensus (2/3 of the results received", logging.String("from-validator", node), logging.String("state-var", sv.ID), logging.String("event-id", eventID)) 332 } 333 334 // we got enough results lets check if they match 335 var result *statevar.KeyValueBundle 336 // check if results from all validator totally agree 337 for nodeID, res := range sv.validatorResults { 338 if result == nil { 339 result = res 340 } 341 if !sv.validatorResults[nodeID].Equals(result) { 342 if sv.log.GetLevel() <= logging.DebugLevel { 343 sv.log.Debug("state var consensus NOT reached through perfect match", logging.String("state-var", sv.ID), logging.String("event-id", eventID), logging.Int("num-results", len(sv.validatorResults))) 344 } 345 346 // initiate a round of voting 347 sv.state = ConsensusStateSeekingConsensus 348 sv.tryConsensusLocked(ctx, rng, requiredVotingPower) 349 return 350 } 351 } 352 353 // we are done - happy days! 354 if sv.log.GetLevel() <= logging.DebugLevel { 355 sv.log.Debug("state var consensus reached through perfect match", logging.String("state-var", sv.ID), logging.String("event-id", eventID), logging.Int("num-results", len(sv.validatorResults))) 356 } 357 sv.state = ConsensusStatePerfectMatch 358 // convert the result to decimal and let the owner of the state variable know 359 sv.consensusReachedLocked(ctx, result) 360 } 361 362 // if the bundles are not all equal to each other, choose one at random and verify that all others are within tolerance. 363 // NB: assumes lock has already been acquired. 364 func (sv *StateVariable) tryConsensusLocked(ctx context.Context, rng *rand.Rand, requiredVotingPower num.Decimal) { 365 // sort the node IDs for determinism 366 nodeIDs := make([]string, 0, len(sv.validatorResults)) 367 for nodeID := range sv.validatorResults { 368 nodeIDs = append(nodeIDs, nodeID) 369 } 370 sort.Strings(nodeIDs) 371 372 alreadyCheckedForTolerance := map[string]struct{}{} 373 374 for len(alreadyCheckedForTolerance) != len(nodeIDs) { 375 nodeID := nodeIDs[rng.Intn(len(nodeIDs))] 376 if _, ok := alreadyCheckedForTolerance[nodeID]; ok { 377 continue 378 } 379 alreadyCheckedForTolerance[nodeID] = struct{}{} 380 candidateResult := sv.validatorResults[nodeID] 381 votingPowerMatch := num.DecimalZero() 382 for _, nID := range nodeIDs { 383 if sv.validatorResults[nID].WithinTolerance(candidateResult) { 384 votingPowerMatch = votingPowerMatch.Add(num.DecimalFromInt64(sv.top.GetVotingPower(nID))) 385 } 386 } 387 if votingPowerMatch.GreaterThanOrEqual(requiredVotingPower) { 388 sv.state = ConsensusStateconsensusReachedLocked 389 sv.consensusReachedLocked(ctx, candidateResult) 390 return 391 } 392 } 393 394 if sv.log.GetLevel() <= logging.DebugLevel { 395 sv.log.Debug("state var consensus NOT reached through random selection", logging.String("state-var", sv.ID), logging.String("event-id", sv.eventID), logging.Int("num-results", len(sv.validatorResults))) 396 } 397 } 398 399 // consensus was reached either through a vote or through perfect matching of all of 2/3 of the validators. 400 // NB: assumes lock has already been acquired. 401 func (sv *StateVariable) consensusReachedLocked(ctx context.Context, acceptedValue *statevar.KeyValueBundle) { 402 if sv.log.GetLevel() <= logging.DebugLevel { 403 sv.log.Debug("consensus reached", logging.String("state-var", sv.ID), logging.String("event-id", sv.eventID)) 404 } 405 406 sv.result(ctx, sv.converter.BundleToInterface(acceptedValue)) 407 sv.addEventLocked() 408 409 if sv.log.IsDebug() { 410 sv.log.Debug("consensus reached for state variable", logging.String("state-var", sv.ID), logging.String("event-id", sv.eventID)) 411 } 412 413 // reset the state 414 sv.eventID = "" 415 sv.validatorResults = nil 416 sv.roundsSinceMeaningfulUpdate = 0 417 } 418 419 // addEventLocked adds an event to the pending events. 420 // NB: assumes lock has already been acquired. 421 func (sv *StateVariable) addEventLocked() { 422 sv.pendingEvents = append(sv.pendingEvents, pendingEvent{sv.eventID, stateToName[sv.state]}) 423 } 424 425 type pendingEvent struct { 426 eventID string 427 state string 428 }