code.vegaprotocol.io/vega@v0.79.0/core/statevar/state_variable.go (about)

     1  // Copyright (C) 2023 Gobalsky Labs Limited
     2  //
     3  // This program is free software: you can redistribute it and/or modify
     4  // it under the terms of the GNU Affero General Public License as
     5  // published by the Free Software Foundation, either version 3 of the
     6  // License, or (at your option) any later version.
     7  //
     8  // This program is distributed in the hope that it will be useful,
     9  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    10  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    11  // GNU Affero General Public License for more details.
    12  //
    13  // You should have received a copy of the GNU Affero General Public License
    14  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    15  
    16  package statevar
    17  
    18  import (
    19  	"context"
    20  	"errors"
    21  	"math/rand"
    22  	"sort"
    23  	"sync"
    24  	"time"
    25  
    26  	"code.vegaprotocol.io/vega/core/events"
    27  	"code.vegaprotocol.io/vega/core/txn"
    28  	"code.vegaprotocol.io/vega/core/types/statevar"
    29  	"code.vegaprotocol.io/vega/libs/num"
    30  	"code.vegaprotocol.io/vega/logging"
    31  	vegapb "code.vegaprotocol.io/vega/protos/vega"
    32  	commandspb "code.vegaprotocol.io/vega/protos/vega/commands/v1"
    33  )
    34  
    35  // ConsensusState trakcs the state transitions of a state variable.
    36  type ConsensusState int
    37  
    38  const (
    39  	ConsensusStateUnspecified ConsensusState = iota
    40  	ConsensusStateCalculationStarted
    41  	ConsensusStatePerfectMatch
    42  	ConsensusStateSeekingConsensus
    43  	ConsensusStateconsensusReachedLocked
    44  	ConsensusStateCalculationAborted
    45  	ConsensusStateError
    46  	ConsensusStateStale
    47  )
    48  
    49  var stateToName = map[ConsensusState]string{
    50  	ConsensusStateUnspecified:            "undefined",
    51  	ConsensusStateCalculationStarted:     "consensus_calc_started",
    52  	ConsensusStatePerfectMatch:           "perfect_match",
    53  	ConsensusStateSeekingConsensus:       "seeking_consensus",
    54  	ConsensusStateconsensusReachedLocked: "consensus_reached",
    55  	ConsensusStateCalculationAborted:     "consensus_calc_aborted",
    56  	ConsensusStateError:                  "error",
    57  }
    58  
    59  type StateVariable struct {
    60  	log              *logging.Logger
    61  	top              Topology
    62  	cmd              Commander
    63  	broker           Broker
    64  	ID               string                                                    // the unique identifier of the state variable
    65  	asset            string                                                    // the asset of the state variable - used for filtering relevant events
    66  	market           string                                                    // the market of the state variable - used for filtering relevant events
    67  	converter        statevar.Converter                                        // convert to/from the key/value bundle model into typed result model
    68  	startCalculation func(string, statevar.FinaliseCalculation)                // a callback to the owner to start the calculation of the value of the state variable
    69  	result           func(context.Context, statevar.StateVariableResult) error // a callback to be called when the value reaches consensus
    70  
    71  	state                       ConsensusState                      // the current status of consensus
    72  	eventID                     string                              // the event ID triggering the calculation
    73  	validatorResults            map[string]*statevar.KeyValueBundle // the result of the calculation as received from validators
    74  	roundsSinceMeaningfulUpdate uint
    75  	pendingEvents               []pendingEvent
    76  	lock                        sync.Mutex
    77  
    78  	currentTime time.Time
    79  
    80  	// use retries to workaround transactions go missing in tendermint
    81  	lastSentSelfBundle     *commandspb.StateVariableProposal
    82  	lastSentSelfBundleTime time.Time
    83  }
    84  
    85  func NewStateVar(
    86  	log *logging.Logger,
    87  	broker Broker,
    88  	top Topology,
    89  	cmd Commander,
    90  	currentTime time.Time,
    91  	ID, asset,
    92  	market string,
    93  	converter statevar.Converter,
    94  	startCalculation func(string, statevar.FinaliseCalculation),
    95  	trigger []statevar.EventType,
    96  	result func(context.Context, statevar.StateVariableResult) error,
    97  ) *StateVariable {
    98  	sv := &StateVariable{
    99  		log:                         log,
   100  		broker:                      broker,
   101  		top:                         top,
   102  		cmd:                         cmd,
   103  		ID:                          ID,
   104  		asset:                       asset,
   105  		market:                      market,
   106  		converter:                   converter,
   107  		startCalculation:            startCalculation,
   108  		result:                      result,
   109  		state:                       ConsensusStateUnspecified,
   110  		validatorResults:            map[string]*statevar.KeyValueBundle{},
   111  		roundsSinceMeaningfulUpdate: 0,
   112  	}
   113  	return sv
   114  }
   115  
   116  // GetAsset returns the asset of the state variable.
   117  func (sv *StateVariable) GetAsset() string {
   118  	return sv.asset
   119  }
   120  
   121  // GetMarket returns the market of the state variable.
   122  func (sv *StateVariable) GetMarket() string {
   123  	return sv.market
   124  }
   125  
   126  // endBlock is called at the end of the block to flush the event. This is snapshot-friendly so that at the end of the block we clear all events as opposed to doing the same at the beginning of the block.
   127  func (sv *StateVariable) endBlock(ctx context.Context) {
   128  	sv.lock.Lock()
   129  	evts := make([]events.Event, 0, len(sv.pendingEvents))
   130  	for _, pending := range sv.pendingEvents {
   131  		newEvt := events.NewStateVarEvent(ctx, sv.ID, pending.eventID, pending.state)
   132  		evts = append(evts, newEvt)
   133  		protoEvt := newEvt.Proto()
   134  		if sv.log.IsDebug() {
   135  			sv.log.Debug("state-var event sent", logging.String("event", protoEvt.String()))
   136  		}
   137  	}
   138  	sv.pendingEvents = []pendingEvent{}
   139  	sv.lock.Unlock()
   140  	sv.broker.SendBatch(evts)
   141  }
   142  
   143  func (sv *StateVariable) startBlock(t time.Time) {
   144  	sv.lock.Lock()
   145  	sv.currentTime = t
   146  
   147  	// if we have an active event, and we sent the bundle and we're 5 seconds after sending the bundle and haven't received our self bundle
   148  	// that means the transaction may have gone missing, let's retry sending it.
   149  	needsResend := false
   150  	if sv.eventID != "" && sv.lastSentSelfBundle != nil && t.After(sv.lastSentSelfBundleTime.Add(5*time.Second)) {
   151  		sv.lastSentSelfBundleTime = t
   152  		needsResend = true
   153  	}
   154  	sv.lock.Unlock()
   155  	if needsResend {
   156  		sv.logAndRetry(errors.New("consensus not reached - timeout expired"), sv.lastSentSelfBundle)
   157  	}
   158  }
   159  
   160  // calculation is required for the state variable for the given event id.
   161  func (sv *StateVariable) eventTriggered(eventID string) {
   162  	sv.lock.Lock()
   163  
   164  	if sv.log.IsDebug() {
   165  		sv.log.Debug("event triggered", logging.String("state-var", sv.ID), logging.String("event-id", eventID))
   166  	}
   167  	// if we get a new event while processing an existing event we abort the current calculation and start a new one
   168  	if sv.eventID != "" {
   169  		if sv.log.GetLevel() <= logging.DebugLevel {
   170  			sv.log.Debug("aborting state variable event", logging.String("state-var", sv.ID), logging.String("aborted-event-id", sv.eventID), logging.String("new-event-id", sv.eventID))
   171  		}
   172  
   173  		// reset the last bundle so we don't send it by mistake
   174  		sv.lastSentSelfBundle = nil
   175  
   176  		// if we got a new event and were not in consensus, increase the number of rounds with no consensus and if
   177  		// we've not had a meaningful update - send an event with stale state
   178  		if sv.state == ConsensusStateSeekingConsensus {
   179  			sv.roundsSinceMeaningfulUpdate++
   180  			if sv.roundsSinceMeaningfulUpdate >= 3 {
   181  				sv.state = ConsensusStateStale
   182  				sv.addEventLocked()
   183  			}
   184  		}
   185  
   186  		sv.state = ConsensusStateCalculationAborted
   187  		sv.addEventLocked()
   188  	}
   189  
   190  	// reset any existing state
   191  	sv.eventID = eventID
   192  	sv.validatorResults = map[string]*statevar.KeyValueBundle{}
   193  	sv.state = ConsensusStateCalculationStarted
   194  	sv.addEventLocked()
   195  
   196  	sv.lock.Unlock()
   197  
   198  	// kickoff calculation
   199  	sv.startCalculation(sv.eventID, sv)
   200  }
   201  
   202  // CalculationFinished is called from the owner when the calculation is completed to kick off consensus.
   203  func (sv *StateVariable) CalculationFinished(eventID string, result statevar.StateVariableResult, err error) {
   204  	sv.lock.Lock()
   205  	if sv.eventID != eventID {
   206  		sv.log.Warn("ignoring recevied the result of a calculation of an old eventID", logging.String("state-var", sv.ID), logging.String("event-id", eventID))
   207  	}
   208  	if err != nil {
   209  		sv.log.Error("could not calculate state for", logging.String("id", sv.ID), logging.String("event-id", eventID))
   210  		sv.state = ConsensusStateError
   211  		sv.addEventLocked()
   212  		sv.eventID = ""
   213  		sv.lock.Unlock()
   214  		return
   215  	}
   216  
   217  	if !sv.top.IsValidator() {
   218  		// if we're a non-validator we still need to do the calculation so that the snapshot will be in sync with
   219  		// a validators, but now we're here we do not need to actually send in our results.
   220  		sv.lock.Unlock()
   221  		return
   222  	}
   223  
   224  	// save our result and send the result to vega to be updated by other nodes.
   225  	kvb := sv.converter.InterfaceToBundle(result).ToProto()
   226  
   227  	// this is a test feature that adds noise up to the tolerance to the state variable
   228  	// it should be excluded by build tag for production
   229  	kvb = sv.AddNoise(kvb)
   230  
   231  	svp := &commandspb.StateVariableProposal{
   232  		Proposal: &vegapb.StateValueProposal{
   233  			StateVarId: sv.ID,
   234  			EventId:    sv.eventID,
   235  			Kvb:        kvb,
   236  		},
   237  	}
   238  
   239  	// set the bundle and the time
   240  	sv.lastSentSelfBundle = svp
   241  	sv.lastSentSelfBundleTime = sv.currentTime
   242  
   243  	// need to release the lock before we send the transaction command
   244  	sv.lock.Unlock()
   245  	sv.cmd.Command(context.Background(), txn.StateVariableProposalCommand, svp, func(_ string, err error) { sv.logAndRetry(err, svp) }, nil)
   246  	if sv.log.GetLevel() <= logging.DebugLevel {
   247  		sv.log.Debug("result calculated and sent to vega", logging.String("validator", sv.top.SelfNodeID()), logging.String("state-var", sv.ID), logging.String("event-id", eventID))
   248  	}
   249  }
   250  
   251  // logAndRetry logs errors from tendermint transaction submission failure and retries if we're still handling the same event.
   252  func (sv *StateVariable) logAndRetry(err error, svp *commandspb.StateVariableProposal) {
   253  	if err == nil {
   254  		return
   255  	}
   256  	sv.lock.Lock()
   257  	sv.log.Error("failed to send state variable proposal command", logging.String("id", sv.ID), logging.String("event-id", sv.eventID), logging.Error(err))
   258  	if svp.Proposal.EventId == sv.eventID {
   259  		sv.lock.Unlock()
   260  		if sv.log.IsDebug() {
   261  			sv.log.Debug("retrying to send state variable proposal command", logging.String("id", sv.ID), logging.String("event-id", sv.eventID))
   262  		}
   263  		sv.cmd.Command(context.Background(), txn.StateVariableProposalCommand, svp, func(_ string, err error) { sv.logAndRetry(err, svp) }, nil)
   264  		return
   265  	}
   266  	sv.lock.Unlock()
   267  }
   268  
   269  // bundleReceived is called when we get a result from another validator corresponding to a given event ID.
   270  func (sv *StateVariable) bundleReceived(ctx context.Context, node, eventID string, bundle *statevar.KeyValueBundle, rng *rand.Rand, validatorVotesRequired num.Decimal) {
   271  	sv.lock.Lock()
   272  	defer sv.lock.Unlock()
   273  
   274  	// if the bundle is received for a stale or wrong event, ignore it
   275  	if sv.eventID != eventID {
   276  		sv.log.Debug("received a result for a stale event", logging.String("ID", sv.ID), logging.String("from-node", node), logging.String("current-even-id", sv.eventID), logging.String("receivedEventID", eventID))
   277  		return
   278  	}
   279  
   280  	// if for some reason we received a result from a non validator node, ignore it
   281  	if !sv.top.IsValidatorVegaPubKey(node) {
   282  		sv.log.Debug("state var bundle received from a non validator node - ignoring", logging.String("from-validator", node), logging.String("state-var", sv.ID), logging.String("eventID", eventID))
   283  		return
   284  	}
   285  
   286  	if sv.top.SelfNodeID() == node {
   287  		sv.lastSentSelfBundle = nil
   288  		sv.lastSentSelfBundleTime = time.Time{}
   289  		sv.log.Debug("state var bundle received self vote", logging.String("from-validator", node), logging.String("state-var", sv.ID), logging.String("eventID", eventID))
   290  	}
   291  
   292  	if sv.log.GetLevel() <= logging.DebugLevel {
   293  		sv.log.Debug("state var bundle received", logging.String("from-validator", node), logging.String("state-var", sv.ID), logging.String("event-id", eventID))
   294  	}
   295  
   296  	if sv.state == ConsensusStatePerfectMatch || sv.state == ConsensusStateconsensusReachedLocked {
   297  		sv.log.Debug("state var bundle received, consensus already reached", logging.String("from-validator", node), logging.String("state-var", sv.ID), logging.String("event-id", eventID))
   298  		return
   299  	}
   300  
   301  	// save the result from the validator and check if we have a quorum
   302  	sv.validatorResults[node] = bundle
   303  
   304  	// calculate how much voting power is required for majority
   305  	requiredVotingPower := validatorVotesRequired.Mul(num.DecimalFromInt64(sv.top.GetTotalVotingPower()))
   306  
   307  	// calculate how much voting power is represented by the voters
   308  	bundlesVotingPower := num.DecimalZero()
   309  	for k := range sv.validatorResults {
   310  		bundlesVotingPower = bundlesVotingPower.Add(num.DecimalFromInt64(sv.top.GetVotingPower(k)))
   311  	}
   312  
   313  	if sv.log.IsDebug() {
   314  		sv.log.Debug("received results for state variable", logging.String("state-var", sv.ID), logging.String("event-id", eventID), logging.Decimal("received-voting-power", bundlesVotingPower), logging.String("out-of", requiredVotingPower.String()))
   315  	}
   316  
   317  	if bundlesVotingPower.LessThan(requiredVotingPower) {
   318  		if sv.log.GetLevel() <= logging.DebugLevel {
   319  			sv.log.Debug("waiting for more results for state variable consensus check", logging.String("state-var", sv.ID), logging.Decimal("received-voting-power", bundlesVotingPower), logging.String("out-of", requiredVotingPower.String()))
   320  		}
   321  		return
   322  	}
   323  
   324  	// if we're already in seeking consensus state, no point in checking if all match - suffice checking if there's a majority with matching within tolerance
   325  	if sv.state == ConsensusStateSeekingConsensus {
   326  		sv.tryConsensusLocked(ctx, rng, requiredVotingPower)
   327  		return
   328  	}
   329  
   330  	if sv.log.GetLevel() <= logging.DebugLevel {
   331  		sv.log.Debug("state var checking consensus (2/3 of the results received", logging.String("from-validator", node), logging.String("state-var", sv.ID), logging.String("event-id", eventID))
   332  	}
   333  
   334  	// we got enough results lets check if they match
   335  	var result *statevar.KeyValueBundle
   336  	// check if results from all validator totally agree
   337  	for nodeID, res := range sv.validatorResults {
   338  		if result == nil {
   339  			result = res
   340  		}
   341  		if !sv.validatorResults[nodeID].Equals(result) {
   342  			if sv.log.GetLevel() <= logging.DebugLevel {
   343  				sv.log.Debug("state var consensus NOT reached through perfect match", logging.String("state-var", sv.ID), logging.String("event-id", eventID), logging.Int("num-results", len(sv.validatorResults)))
   344  			}
   345  
   346  			// initiate a round of voting
   347  			sv.state = ConsensusStateSeekingConsensus
   348  			sv.tryConsensusLocked(ctx, rng, requiredVotingPower)
   349  			return
   350  		}
   351  	}
   352  
   353  	// we are done - happy days!
   354  	if sv.log.GetLevel() <= logging.DebugLevel {
   355  		sv.log.Debug("state var consensus reached through perfect match", logging.String("state-var", sv.ID), logging.String("event-id", eventID), logging.Int("num-results", len(sv.validatorResults)))
   356  	}
   357  	sv.state = ConsensusStatePerfectMatch
   358  	// convert the result to decimal and let the owner of the state variable know
   359  	sv.consensusReachedLocked(ctx, result)
   360  }
   361  
   362  // if the bundles are not all equal to each other, choose one at random and verify that all others are within tolerance.
   363  // NB: assumes lock has already been acquired.
   364  func (sv *StateVariable) tryConsensusLocked(ctx context.Context, rng *rand.Rand, requiredVotingPower num.Decimal) {
   365  	// sort the node IDs for determinism
   366  	nodeIDs := make([]string, 0, len(sv.validatorResults))
   367  	for nodeID := range sv.validatorResults {
   368  		nodeIDs = append(nodeIDs, nodeID)
   369  	}
   370  	sort.Strings(nodeIDs)
   371  
   372  	alreadyCheckedForTolerance := map[string]struct{}{}
   373  
   374  	for len(alreadyCheckedForTolerance) != len(nodeIDs) {
   375  		nodeID := nodeIDs[rng.Intn(len(nodeIDs))]
   376  		if _, ok := alreadyCheckedForTolerance[nodeID]; ok {
   377  			continue
   378  		}
   379  		alreadyCheckedForTolerance[nodeID] = struct{}{}
   380  		candidateResult := sv.validatorResults[nodeID]
   381  		votingPowerMatch := num.DecimalZero()
   382  		for _, nID := range nodeIDs {
   383  			if sv.validatorResults[nID].WithinTolerance(candidateResult) {
   384  				votingPowerMatch = votingPowerMatch.Add(num.DecimalFromInt64(sv.top.GetVotingPower(nID)))
   385  			}
   386  		}
   387  		if votingPowerMatch.GreaterThanOrEqual(requiredVotingPower) {
   388  			sv.state = ConsensusStateconsensusReachedLocked
   389  			sv.consensusReachedLocked(ctx, candidateResult)
   390  			return
   391  		}
   392  	}
   393  
   394  	if sv.log.GetLevel() <= logging.DebugLevel {
   395  		sv.log.Debug("state var consensus NOT reached through random selection", logging.String("state-var", sv.ID), logging.String("event-id", sv.eventID), logging.Int("num-results", len(sv.validatorResults)))
   396  	}
   397  }
   398  
   399  // consensus was reached either through a vote or through perfect matching of all of 2/3 of the validators.
   400  // NB: assumes lock has already been acquired.
   401  func (sv *StateVariable) consensusReachedLocked(ctx context.Context, acceptedValue *statevar.KeyValueBundle) {
   402  	if sv.log.GetLevel() <= logging.DebugLevel {
   403  		sv.log.Debug("consensus reached", logging.String("state-var", sv.ID), logging.String("event-id", sv.eventID))
   404  	}
   405  
   406  	sv.result(ctx, sv.converter.BundleToInterface(acceptedValue))
   407  	sv.addEventLocked()
   408  
   409  	if sv.log.IsDebug() {
   410  		sv.log.Debug("consensus reached for state variable", logging.String("state-var", sv.ID), logging.String("event-id", sv.eventID))
   411  	}
   412  
   413  	// reset the state
   414  	sv.eventID = ""
   415  	sv.validatorResults = nil
   416  	sv.roundsSinceMeaningfulUpdate = 0
   417  }
   418  
   419  // addEventLocked adds an event to the pending events.
   420  // NB: assumes lock has already been acquired.
   421  func (sv *StateVariable) addEventLocked() {
   422  	sv.pendingEvents = append(sv.pendingEvents, pendingEvent{sv.eventID, stateToName[sv.state]})
   423  }
   424  
   425  type pendingEvent struct {
   426  	eventID string
   427  	state   string
   428  }