code.vegaprotocol.io/vega@v0.79.0/core/checkpoint/engine.go (about)

     1  // Copyright (C) 2023 Gobalsky Labs Limited
     2  //
     3  // This program is free software: you can redistribute it and/or modify
     4  // it under the terms of the GNU Affero General Public License as
     5  // published by the Free Software Foundation, either version 3 of the
     6  // License, or (at your option) any later version.
     7  //
     8  // This program is distributed in the hope that it will be useful,
     9  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    10  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    11  // GNU Affero General Public License for more details.
    12  //
    13  // You should have received a copy of the GNU Affero General Public License
    14  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    15  
    16  package checkpoint
    17  
    18  import (
    19  	"bytes"
    20  	"context"
    21  	"encoding/base64"
    22  	"encoding/hex"
    23  	"errors"
    24  	"fmt"
    25  	"os"
    26  	"time"
    27  
    28  	"code.vegaprotocol.io/vega/core/types"
    29  	vegactx "code.vegaprotocol.io/vega/libs/context"
    30  	"code.vegaprotocol.io/vega/logging"
    31  	"code.vegaprotocol.io/vega/paths"
    32  )
    33  
    34  var (
    35  	ErrUnknownCheckpointName            = errors.New("component for checkpoint not registered")
    36  	ErrComponentWithDuplicateName       = errors.New("multiple components with the same name")
    37  	ErrNoCheckpointExpectedToBeRestored = errors.New("no checkpoint expected to be restored")
    38  	ErrIncompatibleHashes               = errors.New("incompatible hashes")
    39  
    40  	cpOrder = []types.CheckpointName{
    41  		types.ValidatorsCheckpoint,            // validators information
    42  		types.AssetsCheckpoint,                // assets are required for collateral to work, and the vote asset needs to be restored
    43  		types.CollateralCheckpoint,            // without balances, governance (proposals, bonds) are difficult
    44  		types.NetParamsCheckpoint,             // net params should go right after assets and collateral, so vote tokens are restored
    45  		types.MarketActivityTrackerCheckpoint, // restore market activity information - needs to happen before governance
    46  		types.ExecutionCheckpoint,             // we should have the parent market state restored before we start loading governance, so successor markets can inherit the correct state
    47  		types.GovernanceCheckpoint,            // depends on all of the above
    48  		types.EpochCheckpoint,                 // restore epoch information... so delegation sequence ID's make sense
    49  		types.MultisigControlCheckpoint,       // restore the staking information, so delegation make sense
    50  		types.StakingCheckpoint,               // restore the staking information, so delegation make sense
    51  		types.DelegationCheckpoint,
    52  		types.PendingRewardsCheckpoint, // pending rewards can basically be reloaded any time
    53  		types.BankingCheckpoint,        // Banking checkpoint needs to be reload any time after collateral
    54  
    55  	}
    56  )
    57  
    58  // State interface represents system components that need checkpointting
    59  // Name returns the component name (key in engine map)
    60  // Hash returns, obviously, the state hash
    61  // @TODO adding func to get the actual data
    62  //
    63  //go:generate go run github.com/golang/mock/mockgen -destination mocks/state_mock.go -package mocks code.vegaprotocol.io/vega/core/checkpoint State
    64  type State interface {
    65  	Name() types.CheckpointName
    66  	Checkpoint() ([]byte, error)
    67  	Load(ctx context.Context, checkpoint []byte) error
    68  }
    69  
    70  // AssetsState is a bit of a hacky way to get the assets that were enabled when checkpoint was reloaded, so we can enable them in the collateral engine
    71  //
    72  //go:generate go run github.com/golang/mock/mockgen -destination mocks/assets_state_mock.go -package mocks code.vegaprotocol.io/vega/core/checkpoint AssetsState
    73  type AssetsState interface {
    74  	State
    75  	GetEnabledAssets() []*types.Asset
    76  }
    77  
    78  // CollateralState is part 2 of the hacky way to enable the assets required to load the collateral state
    79  //
    80  //go:generate go run github.com/golang/mock/mockgen -destination mocks/collateral_state_mock.go -package mocks code.vegaprotocol.io/vega/core/checkpoint CollateralState
    81  type CollateralState interface {
    82  	State
    83  	EnableAsset(ctx context.Context, asset types.Asset) error
    84  }
    85  
    86  type Engine struct {
    87  	log *logging.Logger
    88  
    89  	components map[types.CheckpointName]State
    90  	loadHash   []byte
    91  	nextCP     time.Time
    92  	delta      time.Duration
    93  
    94  	// snapshot fields
    95  	state   *types.PayloadCheckpoint
    96  	data    []byte
    97  	updated bool
    98  	snapErr error
    99  	poll    chan struct{}
   100  
   101  	onCheckpointLoadedCB func(context.Context)
   102  }
   103  
   104  func New(log *logging.Logger, cfg Config, components ...State) (*Engine, error) {
   105  	log = log.Named(namedLogger)
   106  	log.SetLevel(cfg.Level.Get())
   107  
   108  	e := &Engine{
   109  		log:        log,
   110  		components: make(map[types.CheckpointName]State, len(components)),
   111  		nextCP:     time.Time{},
   112  		state: &types.PayloadCheckpoint{
   113  			Checkpoint: &types.CPState{},
   114  		},
   115  	}
   116  	for _, c := range components {
   117  		if err := e.addComponent(c); err != nil {
   118  			return nil, err
   119  		}
   120  	}
   121  	return e, nil
   122  }
   123  
   124  func (e *Engine) RegisterOnCheckpointLoaded(f func(context.Context)) {
   125  	e.onCheckpointLoadedCB = f
   126  }
   127  
   128  func (e *Engine) UponGenesis(ctx context.Context, data []byte) (err error) {
   129  	e.log.Debug("Entering checkpoint.Engine.UponGenesis")
   130  	defer func() {
   131  		if err != nil {
   132  			e.log.Debug("Failure in checkpoint.Engine.UponGenesis", logging.Error(err))
   133  		} else {
   134  			e.log.Debug("Leaving checkpoint.Engine.UponGenesis without error")
   135  		}
   136  	}()
   137  
   138  	state, err := LoadGenesisState(data)
   139  	if err != nil {
   140  		return err
   141  	}
   142  
   143  	// first is there a hash
   144  	if state != nil && len(state.CheckpointHash) != 0 {
   145  		e.loadHash, err = hex.DecodeString(state.CheckpointHash)
   146  		e.log.Warn("Checkpoint restore enabled",
   147  			logging.String("checkpoint-hash-str", state.CheckpointHash),
   148  			logging.String("checkpoint-hex-encoded", hex.EncodeToString(e.loadHash)),
   149  		)
   150  		if err != nil {
   151  			e.loadHash = nil
   152  			e.log.Panic("Malformed restore hash in genesis file",
   153  				logging.Error(err),
   154  			)
   155  		}
   156  	}
   157  
   158  	// a hash is set to be loaded
   159  	if len(e.loadHash) > 0 {
   160  		// no loadHash but a state specified.
   161  		if len(state.CheckpointHash) <= 0 {
   162  			e.log.Panic("invalid genesis file, hash specified without state")
   163  		}
   164  
   165  		buf, err := base64.StdEncoding.DecodeString(state.CheckpointState)
   166  		if err != nil {
   167  			return fmt.Errorf("invalid genesis file checkpoint.state: %w", err)
   168  		}
   169  
   170  		cpt := &types.CheckpointState{}
   171  		if err := cpt.SetState(buf); err != nil {
   172  			return fmt.Errorf("invalid restore checkpoint command: %w", err)
   173  		}
   174  
   175  		// now we can proceed with loading it.
   176  		if err := e.load(ctx, cpt); err != nil {
   177  			return fmt.Errorf("could not load checkpoint: %w", err)
   178  		}
   179  	}
   180  
   181  	// if state nil, no checkpoint to load, let's just call
   182  	// the onCheckPointloaded stuff to notify engine they don't have to wait for a
   183  	// checkpoint to get in business
   184  	if state == nil || len(state.CheckpointHash) <= 0 {
   185  		e.onCheckpointLoaded(ctx)
   186  	}
   187  
   188  	return nil
   189  }
   190  
   191  // Add used to add/register components after the engine has been instantiated already
   192  // this is mainly used to make testing easier.
   193  func (e *Engine) Add(comps ...State) error {
   194  	for _, c := range comps {
   195  		if err := e.addComponent(c); err != nil {
   196  			return err
   197  		}
   198  	}
   199  	return nil
   200  }
   201  
   202  // add component, but check for duplicate names.
   203  func (e *Engine) addComponent(comp State) error {
   204  	name := comp.Name()
   205  	c, ok := e.components[name]
   206  	if !ok {
   207  		e.components[name] = comp
   208  		return nil
   209  	}
   210  	if c != comp {
   211  		return ErrComponentWithDuplicateName
   212  	}
   213  	// component was registered already
   214  	return nil
   215  }
   216  
   217  // BalanceCheckpoint is used for deposits and withdrawals. We want a checkpoint to be taken in those events
   218  // but these checkpoints should not affect the timing (delta, time between checkpoints). Currently, this call
   219  // generates a full checkpoint, but we probably will change this to be a sparse checkpoint
   220  // only containing changes in balances and (perhaps) network parameters...
   221  func (e *Engine) BalanceCheckpoint(ctx context.Context) (*types.CheckpointState, error) {
   222  	// no time stuff here, for now we're just taking a full checkpoint
   223  	cp := e.makeCheckpoint(ctx)
   224  	return cp, nil
   225  }
   226  
   227  // Checkpoint returns the overall checkpoint.
   228  func (e *Engine) Checkpoint(ctx context.Context, t time.Time) (*types.CheckpointState, error) {
   229  	// start time will be zero -> add delta to this time, and return
   230  
   231  	if e.nextCP.IsZero() {
   232  		e.setNextCP(t.Add(e.delta))
   233  		return nil, nil
   234  	}
   235  	if e.nextCP.After(t) {
   236  		return nil, nil
   237  	}
   238  	e.setNextCP(t.Add(e.delta))
   239  	cp := e.makeCheckpoint(ctx)
   240  	return cp, nil
   241  }
   242  
   243  func (e *Engine) makeCheckpoint(ctx context.Context) *types.CheckpointState {
   244  	cp := &types.Checkpoint{}
   245  	for _, k := range cpOrder {
   246  		comp, ok := e.components[k]
   247  		if !ok {
   248  			continue
   249  		}
   250  		data, err := comp.Checkpoint()
   251  		if err != nil {
   252  			panic(fmt.Errorf("failed to generate checkpoint: %w", err))
   253  		}
   254  		// set the correct field
   255  		cp.Set(k, data)
   256  	}
   257  	// add block height to checkpoint
   258  	h, _ := vegactx.BlockHeightFromContext(ctx)
   259  	if err := cp.SetBlockHeight(int64(h)); err != nil {
   260  		e.log.Panic("could not set block height", logging.Error(err))
   261  	}
   262  	cpState := &types.CheckpointState{}
   263  	// setCheckpoint hides the vega type mess
   264  	if err := cpState.SetCheckpoint(cp); err != nil {
   265  		panic(fmt.Errorf("checkpoint could not be created: %w", err))
   266  	}
   267  
   268  	e.log.Debug("checkpoint taken", logging.Uint64("block-height", h))
   269  	return cpState
   270  }
   271  
   272  // load - loads checkpoint data for all components by name.
   273  func (e *Engine) load(ctx context.Context, cpt *types.CheckpointState) error {
   274  	if len(e.loadHash) != 0 {
   275  		hashDiff := bytes.Compare(e.loadHash, cpt.Hash)
   276  
   277  		log := e.log.Info
   278  		if hashDiff != 0 {
   279  			log = e.log.Warn
   280  		}
   281  		log("Checkpoint hash reload requested",
   282  			logging.String("hash-to-load", hex.EncodeToString(e.loadHash)),
   283  			logging.String("checkpoint-hash", hex.EncodeToString(cpt.Hash)),
   284  			logging.Int("hash-diff", hashDiff),
   285  		)
   286  	}
   287  
   288  	if err := e.ValidateCheckpoint(cpt); err != nil {
   289  		return err
   290  	}
   291  	// we found the checkpoint we need to load, set value to nil
   292  	// either the checkpoint was loaded successfully, or it wasn't
   293  	// if this fails, the node goes down
   294  	e.loadHash = nil
   295  	cp, err := cpt.GetCheckpoint()
   296  	if err != nil {
   297  		return err
   298  	}
   299  	// check the hash
   300  	if err := cpt.Validate(); err != nil {
   301  		return err
   302  	}
   303  	var (
   304  		assets                 []*types.Asset
   305  		doneAssets, doneCollat bool // just avoids type asserting all components
   306  	)
   307  	for _, k := range cpOrder {
   308  		cpData := cp.Get(k)
   309  		if len(cpData) == 0 {
   310  			continue
   311  		}
   312  		c, ok := e.components[k]
   313  		if !ok {
   314  			return ErrUnknownCheckpointName // data cannot be restored
   315  		}
   316  		if !doneAssets {
   317  			if ac, ok := c.(AssetsState); ok {
   318  				if err := c.Load(ctx, cpData); err != nil {
   319  					return err
   320  				}
   321  				assets = ac.GetEnabledAssets()
   322  				doneAssets = true
   323  				continue
   324  			}
   325  		}
   326  		// first enable assets, then load the state
   327  		if !doneCollat {
   328  			if cc, ok := c.(CollateralState); ok {
   329  				for _, a := range assets {
   330  					// ignore this error, if the asset is already enabled, that's fine
   331  					// we can carry on as though nothing happened
   332  					if err := cc.EnableAsset(ctx, *a); err != nil {
   333  						e.log.Debug("Asset already enabled",
   334  							logging.String("asset-id", a.ID),
   335  							logging.Error(err),
   336  						)
   337  					}
   338  				}
   339  				doneCollat = true
   340  			}
   341  		}
   342  		if err := c.Load(ctx, cpData); err != nil {
   343  			return err
   344  		}
   345  	}
   346  
   347  	// seems like we went through it all without issue
   348  	// we can execute the callback
   349  	e.onCheckpointLoaded(ctx)
   350  
   351  	return nil
   352  }
   353  
   354  func (e *Engine) ValidateCheckpoint(cpt *types.CheckpointState) error {
   355  	// if no hash was specified, or the hash doesn't match, then don't even attempt to load the checkpoint
   356  	if e.loadHash == nil {
   357  		return ErrNoCheckpointExpectedToBeRestored
   358  	}
   359  	if !bytes.Equal(e.loadHash, cpt.Hash) {
   360  		return fmt.Errorf("received(%v), expected(%v): %w", hex.EncodeToString(cpt.Hash), hex.EncodeToString(e.loadHash), ErrIncompatibleHashes)
   361  	}
   362  	return nil
   363  }
   364  
   365  func (e *Engine) OnTimeElapsedUpdate(ctx context.Context, d time.Duration) error {
   366  	if !e.nextCP.IsZero() {
   367  		// update the time for the next cp
   368  		e.setNextCP(e.nextCP.Add(-e.delta).Add(d))
   369  	}
   370  	// update delta
   371  	e.delta = d
   372  	return nil
   373  }
   374  
   375  // onCheckpointLoaded will call the OnCheckpointLoaded method for
   376  // all checkpoint providers (if it exists).
   377  func (e *Engine) onCheckpointLoaded(ctx context.Context) {
   378  	if e.onCheckpointLoadedCB != nil {
   379  		e.onCheckpointLoadedCB(ctx)
   380  	}
   381  }
   382  
   383  func RemoveAll(vegaPaths paths.Paths) error {
   384  	dbDirectory := vegaPaths.StatePathFor(paths.CheckpointStateHome)
   385  
   386  	if err := os.RemoveAll(dbDirectory); err != nil {
   387  		return fmt.Errorf("an error occurred while removing directory %q: %w", dbDirectory, err)
   388  	}
   389  
   390  	return nil
   391  }