code.vegaprotocol.io/vega@v0.79.0/core/checkpoint/engine.go (about) 1 // Copyright (C) 2023 Gobalsky Labs Limited 2 // 3 // This program is free software: you can redistribute it and/or modify 4 // it under the terms of the GNU Affero General Public License as 5 // published by the Free Software Foundation, either version 3 of the 6 // License, or (at your option) any later version. 7 // 8 // This program is distributed in the hope that it will be useful, 9 // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 // GNU Affero General Public License for more details. 12 // 13 // You should have received a copy of the GNU Affero General Public License 14 // along with this program. If not, see <http://www.gnu.org/licenses/>. 15 16 package checkpoint 17 18 import ( 19 "bytes" 20 "context" 21 "encoding/base64" 22 "encoding/hex" 23 "errors" 24 "fmt" 25 "os" 26 "time" 27 28 "code.vegaprotocol.io/vega/core/types" 29 vegactx "code.vegaprotocol.io/vega/libs/context" 30 "code.vegaprotocol.io/vega/logging" 31 "code.vegaprotocol.io/vega/paths" 32 ) 33 34 var ( 35 ErrUnknownCheckpointName = errors.New("component for checkpoint not registered") 36 ErrComponentWithDuplicateName = errors.New("multiple components with the same name") 37 ErrNoCheckpointExpectedToBeRestored = errors.New("no checkpoint expected to be restored") 38 ErrIncompatibleHashes = errors.New("incompatible hashes") 39 40 cpOrder = []types.CheckpointName{ 41 types.ValidatorsCheckpoint, // validators information 42 types.AssetsCheckpoint, // assets are required for collateral to work, and the vote asset needs to be restored 43 types.CollateralCheckpoint, // without balances, governance (proposals, bonds) are difficult 44 types.NetParamsCheckpoint, // net params should go right after assets and collateral, so vote tokens are restored 45 types.MarketActivityTrackerCheckpoint, // restore market activity information - needs to happen before governance 46 types.ExecutionCheckpoint, // we should have the parent market state restored before we start loading governance, so successor markets can inherit the correct state 47 types.GovernanceCheckpoint, // depends on all of the above 48 types.EpochCheckpoint, // restore epoch information... so delegation sequence ID's make sense 49 types.MultisigControlCheckpoint, // restore the staking information, so delegation make sense 50 types.StakingCheckpoint, // restore the staking information, so delegation make sense 51 types.DelegationCheckpoint, 52 types.PendingRewardsCheckpoint, // pending rewards can basically be reloaded any time 53 types.BankingCheckpoint, // Banking checkpoint needs to be reload any time after collateral 54 55 } 56 ) 57 58 // State interface represents system components that need checkpointting 59 // Name returns the component name (key in engine map) 60 // Hash returns, obviously, the state hash 61 // @TODO adding func to get the actual data 62 // 63 //go:generate go run github.com/golang/mock/mockgen -destination mocks/state_mock.go -package mocks code.vegaprotocol.io/vega/core/checkpoint State 64 type State interface { 65 Name() types.CheckpointName 66 Checkpoint() ([]byte, error) 67 Load(ctx context.Context, checkpoint []byte) error 68 } 69 70 // AssetsState is a bit of a hacky way to get the assets that were enabled when checkpoint was reloaded, so we can enable them in the collateral engine 71 // 72 //go:generate go run github.com/golang/mock/mockgen -destination mocks/assets_state_mock.go -package mocks code.vegaprotocol.io/vega/core/checkpoint AssetsState 73 type AssetsState interface { 74 State 75 GetEnabledAssets() []*types.Asset 76 } 77 78 // CollateralState is part 2 of the hacky way to enable the assets required to load the collateral state 79 // 80 //go:generate go run github.com/golang/mock/mockgen -destination mocks/collateral_state_mock.go -package mocks code.vegaprotocol.io/vega/core/checkpoint CollateralState 81 type CollateralState interface { 82 State 83 EnableAsset(ctx context.Context, asset types.Asset) error 84 } 85 86 type Engine struct { 87 log *logging.Logger 88 89 components map[types.CheckpointName]State 90 loadHash []byte 91 nextCP time.Time 92 delta time.Duration 93 94 // snapshot fields 95 state *types.PayloadCheckpoint 96 data []byte 97 updated bool 98 snapErr error 99 poll chan struct{} 100 101 onCheckpointLoadedCB func(context.Context) 102 } 103 104 func New(log *logging.Logger, cfg Config, components ...State) (*Engine, error) { 105 log = log.Named(namedLogger) 106 log.SetLevel(cfg.Level.Get()) 107 108 e := &Engine{ 109 log: log, 110 components: make(map[types.CheckpointName]State, len(components)), 111 nextCP: time.Time{}, 112 state: &types.PayloadCheckpoint{ 113 Checkpoint: &types.CPState{}, 114 }, 115 } 116 for _, c := range components { 117 if err := e.addComponent(c); err != nil { 118 return nil, err 119 } 120 } 121 return e, nil 122 } 123 124 func (e *Engine) RegisterOnCheckpointLoaded(f func(context.Context)) { 125 e.onCheckpointLoadedCB = f 126 } 127 128 func (e *Engine) UponGenesis(ctx context.Context, data []byte) (err error) { 129 e.log.Debug("Entering checkpoint.Engine.UponGenesis") 130 defer func() { 131 if err != nil { 132 e.log.Debug("Failure in checkpoint.Engine.UponGenesis", logging.Error(err)) 133 } else { 134 e.log.Debug("Leaving checkpoint.Engine.UponGenesis without error") 135 } 136 }() 137 138 state, err := LoadGenesisState(data) 139 if err != nil { 140 return err 141 } 142 143 // first is there a hash 144 if state != nil && len(state.CheckpointHash) != 0 { 145 e.loadHash, err = hex.DecodeString(state.CheckpointHash) 146 e.log.Warn("Checkpoint restore enabled", 147 logging.String("checkpoint-hash-str", state.CheckpointHash), 148 logging.String("checkpoint-hex-encoded", hex.EncodeToString(e.loadHash)), 149 ) 150 if err != nil { 151 e.loadHash = nil 152 e.log.Panic("Malformed restore hash in genesis file", 153 logging.Error(err), 154 ) 155 } 156 } 157 158 // a hash is set to be loaded 159 if len(e.loadHash) > 0 { 160 // no loadHash but a state specified. 161 if len(state.CheckpointHash) <= 0 { 162 e.log.Panic("invalid genesis file, hash specified without state") 163 } 164 165 buf, err := base64.StdEncoding.DecodeString(state.CheckpointState) 166 if err != nil { 167 return fmt.Errorf("invalid genesis file checkpoint.state: %w", err) 168 } 169 170 cpt := &types.CheckpointState{} 171 if err := cpt.SetState(buf); err != nil { 172 return fmt.Errorf("invalid restore checkpoint command: %w", err) 173 } 174 175 // now we can proceed with loading it. 176 if err := e.load(ctx, cpt); err != nil { 177 return fmt.Errorf("could not load checkpoint: %w", err) 178 } 179 } 180 181 // if state nil, no checkpoint to load, let's just call 182 // the onCheckPointloaded stuff to notify engine they don't have to wait for a 183 // checkpoint to get in business 184 if state == nil || len(state.CheckpointHash) <= 0 { 185 e.onCheckpointLoaded(ctx) 186 } 187 188 return nil 189 } 190 191 // Add used to add/register components after the engine has been instantiated already 192 // this is mainly used to make testing easier. 193 func (e *Engine) Add(comps ...State) error { 194 for _, c := range comps { 195 if err := e.addComponent(c); err != nil { 196 return err 197 } 198 } 199 return nil 200 } 201 202 // add component, but check for duplicate names. 203 func (e *Engine) addComponent(comp State) error { 204 name := comp.Name() 205 c, ok := e.components[name] 206 if !ok { 207 e.components[name] = comp 208 return nil 209 } 210 if c != comp { 211 return ErrComponentWithDuplicateName 212 } 213 // component was registered already 214 return nil 215 } 216 217 // BalanceCheckpoint is used for deposits and withdrawals. We want a checkpoint to be taken in those events 218 // but these checkpoints should not affect the timing (delta, time between checkpoints). Currently, this call 219 // generates a full checkpoint, but we probably will change this to be a sparse checkpoint 220 // only containing changes in balances and (perhaps) network parameters... 221 func (e *Engine) BalanceCheckpoint(ctx context.Context) (*types.CheckpointState, error) { 222 // no time stuff here, for now we're just taking a full checkpoint 223 cp := e.makeCheckpoint(ctx) 224 return cp, nil 225 } 226 227 // Checkpoint returns the overall checkpoint. 228 func (e *Engine) Checkpoint(ctx context.Context, t time.Time) (*types.CheckpointState, error) { 229 // start time will be zero -> add delta to this time, and return 230 231 if e.nextCP.IsZero() { 232 e.setNextCP(t.Add(e.delta)) 233 return nil, nil 234 } 235 if e.nextCP.After(t) { 236 return nil, nil 237 } 238 e.setNextCP(t.Add(e.delta)) 239 cp := e.makeCheckpoint(ctx) 240 return cp, nil 241 } 242 243 func (e *Engine) makeCheckpoint(ctx context.Context) *types.CheckpointState { 244 cp := &types.Checkpoint{} 245 for _, k := range cpOrder { 246 comp, ok := e.components[k] 247 if !ok { 248 continue 249 } 250 data, err := comp.Checkpoint() 251 if err != nil { 252 panic(fmt.Errorf("failed to generate checkpoint: %w", err)) 253 } 254 // set the correct field 255 cp.Set(k, data) 256 } 257 // add block height to checkpoint 258 h, _ := vegactx.BlockHeightFromContext(ctx) 259 if err := cp.SetBlockHeight(int64(h)); err != nil { 260 e.log.Panic("could not set block height", logging.Error(err)) 261 } 262 cpState := &types.CheckpointState{} 263 // setCheckpoint hides the vega type mess 264 if err := cpState.SetCheckpoint(cp); err != nil { 265 panic(fmt.Errorf("checkpoint could not be created: %w", err)) 266 } 267 268 e.log.Debug("checkpoint taken", logging.Uint64("block-height", h)) 269 return cpState 270 } 271 272 // load - loads checkpoint data for all components by name. 273 func (e *Engine) load(ctx context.Context, cpt *types.CheckpointState) error { 274 if len(e.loadHash) != 0 { 275 hashDiff := bytes.Compare(e.loadHash, cpt.Hash) 276 277 log := e.log.Info 278 if hashDiff != 0 { 279 log = e.log.Warn 280 } 281 log("Checkpoint hash reload requested", 282 logging.String("hash-to-load", hex.EncodeToString(e.loadHash)), 283 logging.String("checkpoint-hash", hex.EncodeToString(cpt.Hash)), 284 logging.Int("hash-diff", hashDiff), 285 ) 286 } 287 288 if err := e.ValidateCheckpoint(cpt); err != nil { 289 return err 290 } 291 // we found the checkpoint we need to load, set value to nil 292 // either the checkpoint was loaded successfully, or it wasn't 293 // if this fails, the node goes down 294 e.loadHash = nil 295 cp, err := cpt.GetCheckpoint() 296 if err != nil { 297 return err 298 } 299 // check the hash 300 if err := cpt.Validate(); err != nil { 301 return err 302 } 303 var ( 304 assets []*types.Asset 305 doneAssets, doneCollat bool // just avoids type asserting all components 306 ) 307 for _, k := range cpOrder { 308 cpData := cp.Get(k) 309 if len(cpData) == 0 { 310 continue 311 } 312 c, ok := e.components[k] 313 if !ok { 314 return ErrUnknownCheckpointName // data cannot be restored 315 } 316 if !doneAssets { 317 if ac, ok := c.(AssetsState); ok { 318 if err := c.Load(ctx, cpData); err != nil { 319 return err 320 } 321 assets = ac.GetEnabledAssets() 322 doneAssets = true 323 continue 324 } 325 } 326 // first enable assets, then load the state 327 if !doneCollat { 328 if cc, ok := c.(CollateralState); ok { 329 for _, a := range assets { 330 // ignore this error, if the asset is already enabled, that's fine 331 // we can carry on as though nothing happened 332 if err := cc.EnableAsset(ctx, *a); err != nil { 333 e.log.Debug("Asset already enabled", 334 logging.String("asset-id", a.ID), 335 logging.Error(err), 336 ) 337 } 338 } 339 doneCollat = true 340 } 341 } 342 if err := c.Load(ctx, cpData); err != nil { 343 return err 344 } 345 } 346 347 // seems like we went through it all without issue 348 // we can execute the callback 349 e.onCheckpointLoaded(ctx) 350 351 return nil 352 } 353 354 func (e *Engine) ValidateCheckpoint(cpt *types.CheckpointState) error { 355 // if no hash was specified, or the hash doesn't match, then don't even attempt to load the checkpoint 356 if e.loadHash == nil { 357 return ErrNoCheckpointExpectedToBeRestored 358 } 359 if !bytes.Equal(e.loadHash, cpt.Hash) { 360 return fmt.Errorf("received(%v), expected(%v): %w", hex.EncodeToString(cpt.Hash), hex.EncodeToString(e.loadHash), ErrIncompatibleHashes) 361 } 362 return nil 363 } 364 365 func (e *Engine) OnTimeElapsedUpdate(ctx context.Context, d time.Duration) error { 366 if !e.nextCP.IsZero() { 367 // update the time for the next cp 368 e.setNextCP(e.nextCP.Add(-e.delta).Add(d)) 369 } 370 // update delta 371 e.delta = d 372 return nil 373 } 374 375 // onCheckpointLoaded will call the OnCheckpointLoaded method for 376 // all checkpoint providers (if it exists). 377 func (e *Engine) onCheckpointLoaded(ctx context.Context) { 378 if e.onCheckpointLoadedCB != nil { 379 e.onCheckpointLoadedCB(ctx) 380 } 381 } 382 383 func RemoveAll(vegaPaths paths.Paths) error { 384 dbDirectory := vegaPaths.StatePathFor(paths.CheckpointStateHome) 385 386 if err := os.RemoveAll(dbDirectory); err != nil { 387 return fmt.Errorf("an error occurred while removing directory %q: %w", dbDirectory, err) 388 } 389 390 return nil 391 }