code.vegaprotocol.io/vega@v0.79.0/datanode/broker/sqlstore_broker.go (about) 1 // Copyright (C) 2023 Gobalsky Labs Limited 2 // 3 // This program is free software: you can redistribute it and/or modify 4 // it under the terms of the GNU Affero General Public License as 5 // published by the Free Software Foundation, either version 3 of the 6 // License, or (at your option) any later version. 7 // 8 // This program is distributed in the hope that it will be useful, 9 // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 // GNU Affero General Public License for more details. 12 // 13 // You should have received a copy of the GNU Affero General Public License 14 // along with this program. If not, see <http://www.gnu.org/licenses/>. 15 16 package broker 17 18 import ( 19 "context" 20 "fmt" 21 "time" 22 23 "code.vegaprotocol.io/vega/core/events" 24 "code.vegaprotocol.io/vega/datanode/entities" 25 "code.vegaprotocol.io/vega/datanode/metrics" 26 "code.vegaprotocol.io/vega/logging" 27 28 "github.com/pkg/errors" 29 ) 30 31 type SQLBrokerSubscriber interface { 32 SetVegaTime(vegaTime time.Time) 33 Flush(ctx context.Context) error 34 Push(ctx context.Context, val events.Event) error 35 Types() []events.Type 36 Name() string 37 } 38 39 type SQLStoreEventBroker interface { 40 Receive(ctx context.Context) error 41 } 42 43 type TransactionManager interface { 44 WithConnection(ctx context.Context) (context.Context, error) 45 WithTransaction(ctx context.Context) (context.Context, error) 46 Commit(ctx context.Context) error 47 Rollback(ctx context.Context) error 48 RefreshMaterializedViews(ctx context.Context) error 49 } 50 51 type BlockStore interface { 52 Add(ctx context.Context, b entities.Block) error 53 GetLastBlock(ctx context.Context) (entities.Block, error) 54 } 55 56 type ProtocolUpgradeHandler interface { 57 OnProtocolUpgradeEvent(ctx context.Context, chainID string, lastCommittedBlockHeight int64) error 58 GetProtocolUpgradeStarted() bool 59 } 60 61 const ( 62 slowTimeUpdateThreshold = 2 * time.Second 63 ) 64 65 // SQLStoreBroker : push events to each subscriber with a single go routine across all types. 66 type SQLStoreBroker struct { 67 config Config 68 log *logging.Logger 69 subscribers []SQLBrokerSubscriber 70 typeToSubs map[events.Type][]SQLBrokerSubscriber 71 eventSource EventReceiver 72 transactionManager TransactionManager 73 blockStore BlockStore 74 onBlockCommitted func(ctx context.Context, chainId string, lastCommittedBlockHeight int64, snapshotTaken bool) 75 protocolUpdateHandler ProtocolUpgradeHandler 76 chainID string 77 lastBlock *entities.Block 78 slowTimeUpdateTicker *time.Ticker 79 receivedProtocolUpgradeEvent bool 80 snapshotTaken bool 81 } 82 83 func NewSQLStoreBroker( 84 log *logging.Logger, 85 config Config, 86 chainID string, 87 eventsource EventReceiver, 88 transactionManager TransactionManager, 89 blockStore BlockStore, 90 onBlockCommitted func(ctx context.Context, chainId string, lastCommittedBlockHeight int64, snapshotTaken bool), 91 protocolUpdateHandler ProtocolUpgradeHandler, 92 subs []SQLBrokerSubscriber, 93 ) *SQLStoreBroker { 94 b := &SQLStoreBroker{ 95 config: config, 96 log: log.Named("sqlstore-broker"), 97 subscribers: subs, 98 typeToSubs: map[events.Type][]SQLBrokerSubscriber{}, 99 eventSource: eventsource, 100 transactionManager: transactionManager, 101 blockStore: blockStore, 102 chainID: chainID, 103 onBlockCommitted: onBlockCommitted, 104 protocolUpdateHandler: protocolUpdateHandler, 105 slowTimeUpdateTicker: time.NewTicker(slowTimeUpdateThreshold), 106 } 107 108 for _, s := range subs { 109 for _, evtType := range s.Types() { 110 b.typeToSubs[evtType] = append(b.typeToSubs[evtType], s) 111 } 112 } 113 114 return b 115 } 116 117 func (b *SQLStoreBroker) Receive(ctx context.Context) error { 118 if err := b.eventSource.Listen(); err != nil { 119 return err 120 } 121 122 receiveCh, errCh := b.eventSource.Receive(ctx) 123 124 nextBlock, err := b.waitForFirstBlock(ctx, errCh, receiveCh) 125 if err != nil { 126 return err 127 } 128 129 dbContext, err := b.transactionManager.WithConnection(ctx) 130 if err != nil { 131 return err 132 } 133 134 for { 135 if nextBlock, err = b.processBlock(ctx, dbContext, nextBlock, receiveCh, errCh); err != nil { 136 return err 137 } 138 139 b.onBlockCommitted(ctx, b.chainID, b.lastBlock.Height, b.snapshotTaken) 140 141 if b.receivedProtocolUpgradeEvent { 142 return b.protocolUpdateHandler.OnProtocolUpgradeEvent(ctx, b.chainID, b.lastBlock.Height) 143 } 144 } 145 } 146 147 // waitForFirstBlock processes all events until a new block is encountered and returns the new block. A 'new' block is one for which 148 // events have not already been processed by this datanode. 149 func (b *SQLStoreBroker) waitForFirstBlock(ctx context.Context, errCh <-chan error, receiveCh <-chan events.Event) (*entities.Block, error) { 150 lastProcessedBlock, err := b.blockStore.GetLastBlock(ctx) 151 152 if err == nil { 153 b.log.Infof("waiting for first unprocessed block, last processed block height: %d", lastProcessedBlock.Height) 154 } else if errors.Is(err, entities.ErrNotFound) { 155 lastProcessedBlock = entities.Block{ 156 VegaTime: time.Time{}, 157 // TODO: This is making the assumption that the first block will be height 1. This is not necessarily true. 158 // The node can start at any time given to Tendermint through the genesis file. 159 Height: 0, 160 Hash: nil, 161 } 162 } else { 163 return nil, err 164 } 165 166 var beginBlock entities.BeginBlockEvent 167 for { 168 select { 169 case <-ctx.Done(): 170 return nil, ctx.Err() 171 case err = <-errCh: 172 return nil, err 173 case e := <-receiveCh: 174 if e.Type() == events.BeginBlockEvent { 175 beginBlock = e.(entities.BeginBlockEvent) 176 metrics.EventCounterInc(beginBlock.Type().String()) 177 178 if beginBlock.BlockNr() > lastProcessedBlock.Height+1 { 179 return nil, fmt.Errorf("block height on begin block, %d, is too high, the height of the last processed block is %d", 180 beginBlock.BlockNr(), lastProcessedBlock.Height) 181 } 182 183 if beginBlock.BlockNr() > lastProcessedBlock.Height { 184 b.log.Info("first unprocessed block received, starting block processing") 185 return entities.BlockFromBeginBlock(beginBlock) 186 } 187 } 188 } 189 } 190 } 191 192 // processBlock processes all events in the current block up to the next time update. The next time block is returned when processing of the block is done. 193 func (b *SQLStoreBroker) processBlock(ctx context.Context, dbContext context.Context, block *entities.Block, eventsCh <-chan events.Event, errCh <-chan error) (*entities.Block, error) { 194 metrics.BlockCounterInc() 195 metrics.SetBlockHeight(float64(block.Height)) 196 197 blockTimer := blockTimer{} 198 blockTimer.startTimer() 199 defer func() { 200 blockTimer.stopTimer() 201 metrics.AddBlockHandlingTime(blockTimer.duration) 202 }() 203 204 for _, subscriber := range b.subscribers { 205 subscriber.SetVegaTime(block.VegaTime) 206 } 207 208 // Don't use our parent context as a parent of the database operation; if we get cancelled 209 // by e.g. a shutdown request then let the last database operation complete. 210 var blockCtx context.Context 211 var cancel context.CancelFunc 212 blockCtx, cancel = context.WithCancel(dbContext) 213 defer cancel() 214 215 blockCtx, err := b.transactionManager.WithTransaction(blockCtx) 216 defer b.transactionManager.Rollback(blockCtx) 217 218 if err != nil { 219 return nil, fmt.Errorf("failed to add transaction to context:%w", err) 220 } 221 222 if err = b.addBlock(blockCtx, block); err != nil { 223 return nil, fmt.Errorf("failed to add block:%w", err) 224 } 225 226 defer b.slowTimeUpdateTicker.Stop() 227 b.snapshotTaken = false 228 betweenBlocks := false 229 refreshMaterializedViews := false 230 for { 231 blockTimer.stopTimer() 232 select { 233 case <-ctx.Done(): 234 return nil, ctx.Err() 235 case err = <-errCh: 236 return nil, err 237 case <-b.slowTimeUpdateTicker.C: 238 b.log.Warningf("slow time update detected, time between checks %v, block height: %d, total block processing time: %v", slowTimeUpdateThreshold, 239 block.Height, blockTimer.duration) 240 case e := <-eventsCh: 241 if b.protocolUpdateHandler.GetProtocolUpgradeStarted() { 242 return nil, errors.New("received event after protocol upgrade started") 243 } 244 245 if b.config.Level.Level == logging.DebugLevel { 246 b.log.Debug("received event", logging.String("type", e.Type().String()), logging.String("trace-id", e.TraceID())) 247 } 248 metrics.EventCounterInc(e.Type().String()) 249 blockTimer.startTimer() 250 251 switch e.Type() { 252 case events.EndBlockEvent: 253 err = b.flushAllSubscribers(blockCtx) 254 if err != nil { 255 return nil, err 256 } 257 258 err = b.transactionManager.Commit(blockCtx) 259 if err != nil { 260 return nil, fmt.Errorf("failed to commit transactional context:%w", err) 261 } 262 b.slowTimeUpdateTicker.Reset(slowTimeUpdateThreshold) 263 betweenBlocks = true 264 265 if err = b.handleEvent(blockCtx, e); err != nil { 266 return nil, err 267 } 268 269 // at the end of the block, if we have had an epoch event in that block then we should have received 270 // statistics that were updated and reported only at the end of an epoch. The refreshMaterialized flag 271 // should have been set by the EpochUpdate event before this EndBlockEvent was received 272 // so we need to call the refresh materialized views function here. 273 if refreshMaterializedViews { 274 // We need to refresh the materialized views as we have reached the end of an epoch 275 err = b.transactionManager.RefreshMaterializedViews(blockCtx) 276 if err != nil { 277 return nil, fmt.Errorf("failed to refresh materialized views:%w", err) 278 } 279 refreshMaterializedViews = false 280 } 281 282 case events.BeginBlockEvent: 283 beginBlock := e.(entities.BeginBlockEvent) 284 return entities.BlockFromBeginBlock(beginBlock) 285 case events.CoreSnapshotEvent: 286 // if a snapshot is taken on a protocol upgrade block, we want it to be taken synchronously as part of handling of protocol upgrade 287 b.snapshotTaken = !e.StreamMessage().GetCoreSnapshotEvent().ProtocolUpgradeBlock 288 if err = b.handleEvent(blockCtx, e); err != nil { 289 return nil, err 290 } 291 case events.ProtocolUpgradeStartedEvent: 292 b.receivedProtocolUpgradeEvent = true 293 // we've received a protocol upgrade event which is the last event core will have sent out 294 // so we can leave now 295 return nil, nil 296 case events.EpochUpdate: 297 // We have received an epoch event in this block, so we set a flag that will indicate that we should 298 // refresh any materialized views that need to be refreshed after receiving data that is only sent 299 // once an epoch. 300 refreshMaterializedViews = true 301 // We want the default block to execute after we have done this so we fall through to the default case 302 // DANGER WILL ROBINSON!!! Make sure you don't add any code here that will prevent the fallthrough 303 // or add another case statement that will prevent the fallthrough to the default case 304 fallthrough 305 default: 306 if betweenBlocks { 307 // we should only be receiving a BeginBlockEvent immediately after an EndBlockEvent 308 panic(fmt.Errorf("received event %s between end block and begin block", e.Type().String())) 309 } 310 if err = b.handleEvent(blockCtx, e); err != nil { 311 return nil, err 312 } 313 } 314 } 315 } 316 } 317 318 func (b *SQLStoreBroker) flushAllSubscribers(blockCtx context.Context) error { 319 for _, subscriber := range b.subscribers { 320 timer := metrics.NewTimeCounter(subscriber.Name()) 321 err := subscriber.Flush(blockCtx) 322 timer.FlushTimeCounterAdd() 323 if err != nil { 324 return fmt.Errorf("failed to flush subscriber:%w", err) 325 } 326 } 327 return nil 328 } 329 330 func (b *SQLStoreBroker) addBlock(ctx context.Context, block *entities.Block) error { 331 // At startup we get time updates that have the same time to microsecond precision which causes 332 // a primary key restraint failure, this code is to handle this scenario 333 if b.lastBlock == nil || !block.VegaTime.Equal(b.lastBlock.VegaTime) { 334 b.lastBlock = block 335 err := b.blockStore.Add(ctx, *block) 336 if err != nil { 337 return errors.Wrap(err, "failed to add block") 338 } 339 } 340 341 return nil 342 } 343 344 func (b *SQLStoreBroker) handleEvent(ctx context.Context, e events.Event) error { 345 if err := checkChainID(b.chainID, e.ChainID()); err != nil { 346 return err 347 } 348 349 if subs, ok := b.typeToSubs[e.Type()]; ok { 350 for _, sub := range subs { 351 if err := b.push(ctx, sub, e); err != nil { 352 return err 353 } 354 } 355 } 356 357 return nil 358 } 359 360 func (b *SQLStoreBroker) push(ctx context.Context, sub SQLBrokerSubscriber, e events.Event) error { 361 timer := metrics.NewTimeCounter("sql", sub.Name(), e.Type().String()) 362 err := sub.Push(ctx, e) 363 timer.EventTimeCounterAdd() 364 365 if err != nil { 366 errMsg := fmt.Sprintf("failed to process event %v error:%+v", e.StreamMessage(), err) 367 b.log.Error(errMsg) 368 if b.config.PanicOnError { 369 return err 370 } 371 } 372 373 return nil 374 } 375 376 type blockTimer struct { 377 duration time.Duration 378 startTime *time.Time 379 } 380 381 func (t *blockTimer) startTimer() { 382 now := time.Now() 383 t.startTime = &now 384 } 385 386 func (t *blockTimer) stopTimer() { 387 if t.startTime != nil { 388 t.duration = t.duration + time.Since(*t.startTime) 389 t.startTime = nil 390 } 391 }