code.vegaprotocol.io/vega@v0.79.0/datanode/broker/sqlstore_broker.go (about)

     1  // Copyright (C) 2023 Gobalsky Labs Limited
     2  //
     3  // This program is free software: you can redistribute it and/or modify
     4  // it under the terms of the GNU Affero General Public License as
     5  // published by the Free Software Foundation, either version 3 of the
     6  // License, or (at your option) any later version.
     7  //
     8  // This program is distributed in the hope that it will be useful,
     9  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    10  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    11  // GNU Affero General Public License for more details.
    12  //
    13  // You should have received a copy of the GNU Affero General Public License
    14  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    15  
    16  package broker
    17  
    18  import (
    19  	"context"
    20  	"fmt"
    21  	"time"
    22  
    23  	"code.vegaprotocol.io/vega/core/events"
    24  	"code.vegaprotocol.io/vega/datanode/entities"
    25  	"code.vegaprotocol.io/vega/datanode/metrics"
    26  	"code.vegaprotocol.io/vega/logging"
    27  
    28  	"github.com/pkg/errors"
    29  )
    30  
    31  type SQLBrokerSubscriber interface {
    32  	SetVegaTime(vegaTime time.Time)
    33  	Flush(ctx context.Context) error
    34  	Push(ctx context.Context, val events.Event) error
    35  	Types() []events.Type
    36  	Name() string
    37  }
    38  
    39  type SQLStoreEventBroker interface {
    40  	Receive(ctx context.Context) error
    41  }
    42  
    43  type TransactionManager interface {
    44  	WithConnection(ctx context.Context) (context.Context, error)
    45  	WithTransaction(ctx context.Context) (context.Context, error)
    46  	Commit(ctx context.Context) error
    47  	Rollback(ctx context.Context) error
    48  	RefreshMaterializedViews(ctx context.Context) error
    49  }
    50  
    51  type BlockStore interface {
    52  	Add(ctx context.Context, b entities.Block) error
    53  	GetLastBlock(ctx context.Context) (entities.Block, error)
    54  }
    55  
    56  type ProtocolUpgradeHandler interface {
    57  	OnProtocolUpgradeEvent(ctx context.Context, chainID string, lastCommittedBlockHeight int64) error
    58  	GetProtocolUpgradeStarted() bool
    59  }
    60  
    61  const (
    62  	slowTimeUpdateThreshold = 2 * time.Second
    63  )
    64  
    65  // SQLStoreBroker : push events to each subscriber with a single go routine across all types.
    66  type SQLStoreBroker struct {
    67  	config                       Config
    68  	log                          *logging.Logger
    69  	subscribers                  []SQLBrokerSubscriber
    70  	typeToSubs                   map[events.Type][]SQLBrokerSubscriber
    71  	eventSource                  EventReceiver
    72  	transactionManager           TransactionManager
    73  	blockStore                   BlockStore
    74  	onBlockCommitted             func(ctx context.Context, chainId string, lastCommittedBlockHeight int64, snapshotTaken bool)
    75  	protocolUpdateHandler        ProtocolUpgradeHandler
    76  	chainID                      string
    77  	lastBlock                    *entities.Block
    78  	slowTimeUpdateTicker         *time.Ticker
    79  	receivedProtocolUpgradeEvent bool
    80  	snapshotTaken                bool
    81  }
    82  
    83  func NewSQLStoreBroker(
    84  	log *logging.Logger,
    85  	config Config,
    86  	chainID string,
    87  	eventsource EventReceiver,
    88  	transactionManager TransactionManager,
    89  	blockStore BlockStore,
    90  	onBlockCommitted func(ctx context.Context, chainId string, lastCommittedBlockHeight int64, snapshotTaken bool),
    91  	protocolUpdateHandler ProtocolUpgradeHandler,
    92  	subs []SQLBrokerSubscriber,
    93  ) *SQLStoreBroker {
    94  	b := &SQLStoreBroker{
    95  		config:                config,
    96  		log:                   log.Named("sqlstore-broker"),
    97  		subscribers:           subs,
    98  		typeToSubs:            map[events.Type][]SQLBrokerSubscriber{},
    99  		eventSource:           eventsource,
   100  		transactionManager:    transactionManager,
   101  		blockStore:            blockStore,
   102  		chainID:               chainID,
   103  		onBlockCommitted:      onBlockCommitted,
   104  		protocolUpdateHandler: protocolUpdateHandler,
   105  		slowTimeUpdateTicker:  time.NewTicker(slowTimeUpdateThreshold),
   106  	}
   107  
   108  	for _, s := range subs {
   109  		for _, evtType := range s.Types() {
   110  			b.typeToSubs[evtType] = append(b.typeToSubs[evtType], s)
   111  		}
   112  	}
   113  
   114  	return b
   115  }
   116  
   117  func (b *SQLStoreBroker) Receive(ctx context.Context) error {
   118  	if err := b.eventSource.Listen(); err != nil {
   119  		return err
   120  	}
   121  
   122  	receiveCh, errCh := b.eventSource.Receive(ctx)
   123  
   124  	nextBlock, err := b.waitForFirstBlock(ctx, errCh, receiveCh)
   125  	if err != nil {
   126  		return err
   127  	}
   128  
   129  	dbContext, err := b.transactionManager.WithConnection(ctx)
   130  	if err != nil {
   131  		return err
   132  	}
   133  
   134  	for {
   135  		if nextBlock, err = b.processBlock(ctx, dbContext, nextBlock, receiveCh, errCh); err != nil {
   136  			return err
   137  		}
   138  
   139  		b.onBlockCommitted(ctx, b.chainID, b.lastBlock.Height, b.snapshotTaken)
   140  
   141  		if b.receivedProtocolUpgradeEvent {
   142  			return b.protocolUpdateHandler.OnProtocolUpgradeEvent(ctx, b.chainID, b.lastBlock.Height)
   143  		}
   144  	}
   145  }
   146  
   147  // waitForFirstBlock processes all events until a new block is encountered and returns the new block. A 'new' block is one for which
   148  // events have not already been processed by this datanode.
   149  func (b *SQLStoreBroker) waitForFirstBlock(ctx context.Context, errCh <-chan error, receiveCh <-chan events.Event) (*entities.Block, error) {
   150  	lastProcessedBlock, err := b.blockStore.GetLastBlock(ctx)
   151  
   152  	if err == nil {
   153  		b.log.Infof("waiting for first unprocessed block, last processed block height: %d", lastProcessedBlock.Height)
   154  	} else if errors.Is(err, entities.ErrNotFound) {
   155  		lastProcessedBlock = entities.Block{
   156  			VegaTime: time.Time{},
   157  			// TODO: This is making the assumption that the first block will be height 1. This is not necessarily true.
   158  			//       The node can start at any time given to Tendermint through the genesis file.
   159  			Height: 0,
   160  			Hash:   nil,
   161  		}
   162  	} else {
   163  		return nil, err
   164  	}
   165  
   166  	var beginBlock entities.BeginBlockEvent
   167  	for {
   168  		select {
   169  		case <-ctx.Done():
   170  			return nil, ctx.Err()
   171  		case err = <-errCh:
   172  			return nil, err
   173  		case e := <-receiveCh:
   174  			if e.Type() == events.BeginBlockEvent {
   175  				beginBlock = e.(entities.BeginBlockEvent)
   176  				metrics.EventCounterInc(beginBlock.Type().String())
   177  
   178  				if beginBlock.BlockNr() > lastProcessedBlock.Height+1 {
   179  					return nil, fmt.Errorf("block height on begin block, %d, is too high, the height of the last processed block is %d",
   180  						beginBlock.BlockNr(), lastProcessedBlock.Height)
   181  				}
   182  
   183  				if beginBlock.BlockNr() > lastProcessedBlock.Height {
   184  					b.log.Info("first unprocessed block received, starting block processing")
   185  					return entities.BlockFromBeginBlock(beginBlock)
   186  				}
   187  			}
   188  		}
   189  	}
   190  }
   191  
   192  // processBlock processes all events in the current block up to the next time update.  The next time block is returned when processing of the block is done.
   193  func (b *SQLStoreBroker) processBlock(ctx context.Context, dbContext context.Context, block *entities.Block, eventsCh <-chan events.Event, errCh <-chan error) (*entities.Block, error) {
   194  	metrics.BlockCounterInc()
   195  	metrics.SetBlockHeight(float64(block.Height))
   196  
   197  	blockTimer := blockTimer{}
   198  	blockTimer.startTimer()
   199  	defer func() {
   200  		blockTimer.stopTimer()
   201  		metrics.AddBlockHandlingTime(blockTimer.duration)
   202  	}()
   203  
   204  	for _, subscriber := range b.subscribers {
   205  		subscriber.SetVegaTime(block.VegaTime)
   206  	}
   207  
   208  	// Don't use our parent context as a parent of the database operation; if we get cancelled
   209  	// by e.g. a shutdown request then let the last database operation complete.
   210  	var blockCtx context.Context
   211  	var cancel context.CancelFunc
   212  	blockCtx, cancel = context.WithCancel(dbContext)
   213  	defer cancel()
   214  
   215  	blockCtx, err := b.transactionManager.WithTransaction(blockCtx)
   216  	defer b.transactionManager.Rollback(blockCtx)
   217  
   218  	if err != nil {
   219  		return nil, fmt.Errorf("failed to add transaction to context:%w", err)
   220  	}
   221  
   222  	if err = b.addBlock(blockCtx, block); err != nil {
   223  		return nil, fmt.Errorf("failed to add block:%w", err)
   224  	}
   225  
   226  	defer b.slowTimeUpdateTicker.Stop()
   227  	b.snapshotTaken = false
   228  	betweenBlocks := false
   229  	refreshMaterializedViews := false
   230  	for {
   231  		blockTimer.stopTimer()
   232  		select {
   233  		case <-ctx.Done():
   234  			return nil, ctx.Err()
   235  		case err = <-errCh:
   236  			return nil, err
   237  		case <-b.slowTimeUpdateTicker.C:
   238  			b.log.Warningf("slow time update detected, time between checks %v, block height: %d, total block processing time: %v", slowTimeUpdateThreshold,
   239  				block.Height, blockTimer.duration)
   240  		case e := <-eventsCh:
   241  			if b.protocolUpdateHandler.GetProtocolUpgradeStarted() {
   242  				return nil, errors.New("received event after protocol upgrade started")
   243  			}
   244  
   245  			if b.config.Level.Level == logging.DebugLevel {
   246  				b.log.Debug("received event", logging.String("type", e.Type().String()), logging.String("trace-id", e.TraceID()))
   247  			}
   248  			metrics.EventCounterInc(e.Type().String())
   249  			blockTimer.startTimer()
   250  
   251  			switch e.Type() {
   252  			case events.EndBlockEvent:
   253  				err = b.flushAllSubscribers(blockCtx)
   254  				if err != nil {
   255  					return nil, err
   256  				}
   257  
   258  				err = b.transactionManager.Commit(blockCtx)
   259  				if err != nil {
   260  					return nil, fmt.Errorf("failed to commit transactional context:%w", err)
   261  				}
   262  				b.slowTimeUpdateTicker.Reset(slowTimeUpdateThreshold)
   263  				betweenBlocks = true
   264  
   265  				if err = b.handleEvent(blockCtx, e); err != nil {
   266  					return nil, err
   267  				}
   268  
   269  				// at the end of the block, if we have had an epoch event in that block then we should have received
   270  				// statistics that were updated and reported only at the end of an epoch. The refreshMaterialized flag
   271  				// should have been set by the EpochUpdate event before this EndBlockEvent was received
   272  				// so we need to call the refresh materialized views function here.
   273  				if refreshMaterializedViews {
   274  					// We need to refresh the materialized views as we have reached the end of an epoch
   275  					err = b.transactionManager.RefreshMaterializedViews(blockCtx)
   276  					if err != nil {
   277  						return nil, fmt.Errorf("failed to refresh materialized views:%w", err)
   278  					}
   279  					refreshMaterializedViews = false
   280  				}
   281  
   282  			case events.BeginBlockEvent:
   283  				beginBlock := e.(entities.BeginBlockEvent)
   284  				return entities.BlockFromBeginBlock(beginBlock)
   285  			case events.CoreSnapshotEvent:
   286  				// if a snapshot is taken on a protocol upgrade block, we want it to be taken synchronously as part of handling of protocol upgrade
   287  				b.snapshotTaken = !e.StreamMessage().GetCoreSnapshotEvent().ProtocolUpgradeBlock
   288  				if err = b.handleEvent(blockCtx, e); err != nil {
   289  					return nil, err
   290  				}
   291  			case events.ProtocolUpgradeStartedEvent:
   292  				b.receivedProtocolUpgradeEvent = true
   293  				// we've received a protocol upgrade event which is the last event core will have sent out
   294  				// so we can leave now
   295  				return nil, nil
   296  			case events.EpochUpdate:
   297  				// We have received an epoch event in this block, so we set a flag that will indicate that we should
   298  				// refresh any materialized views that need to be refreshed after receiving data that is only sent
   299  				// once an epoch.
   300  				refreshMaterializedViews = true
   301  				// We want the default block to execute after we have done this so we fall through to the default case
   302  				// DANGER WILL ROBINSON!!! Make sure you don't add any code here that will prevent the fallthrough
   303  				// or add another case statement that will prevent the fallthrough to the default case
   304  				fallthrough
   305  			default:
   306  				if betweenBlocks {
   307  					// we should only be receiving a BeginBlockEvent immediately after an EndBlockEvent
   308  					panic(fmt.Errorf("received event %s between end block and begin block", e.Type().String()))
   309  				}
   310  				if err = b.handleEvent(blockCtx, e); err != nil {
   311  					return nil, err
   312  				}
   313  			}
   314  		}
   315  	}
   316  }
   317  
   318  func (b *SQLStoreBroker) flushAllSubscribers(blockCtx context.Context) error {
   319  	for _, subscriber := range b.subscribers {
   320  		timer := metrics.NewTimeCounter(subscriber.Name())
   321  		err := subscriber.Flush(blockCtx)
   322  		timer.FlushTimeCounterAdd()
   323  		if err != nil {
   324  			return fmt.Errorf("failed to flush subscriber:%w", err)
   325  		}
   326  	}
   327  	return nil
   328  }
   329  
   330  func (b *SQLStoreBroker) addBlock(ctx context.Context, block *entities.Block) error {
   331  	// At startup we get time updates that have the same time to microsecond precision which causes
   332  	// a primary key restraint failure, this code is to handle this scenario
   333  	if b.lastBlock == nil || !block.VegaTime.Equal(b.lastBlock.VegaTime) {
   334  		b.lastBlock = block
   335  		err := b.blockStore.Add(ctx, *block)
   336  		if err != nil {
   337  			return errors.Wrap(err, "failed to add block")
   338  		}
   339  	}
   340  
   341  	return nil
   342  }
   343  
   344  func (b *SQLStoreBroker) handleEvent(ctx context.Context, e events.Event) error {
   345  	if err := checkChainID(b.chainID, e.ChainID()); err != nil {
   346  		return err
   347  	}
   348  
   349  	if subs, ok := b.typeToSubs[e.Type()]; ok {
   350  		for _, sub := range subs {
   351  			if err := b.push(ctx, sub, e); err != nil {
   352  				return err
   353  			}
   354  		}
   355  	}
   356  
   357  	return nil
   358  }
   359  
   360  func (b *SQLStoreBroker) push(ctx context.Context, sub SQLBrokerSubscriber, e events.Event) error {
   361  	timer := metrics.NewTimeCounter("sql", sub.Name(), e.Type().String())
   362  	err := sub.Push(ctx, e)
   363  	timer.EventTimeCounterAdd()
   364  
   365  	if err != nil {
   366  		errMsg := fmt.Sprintf("failed to process event %v error:%+v", e.StreamMessage(), err)
   367  		b.log.Error(errMsg)
   368  		if b.config.PanicOnError {
   369  			return err
   370  		}
   371  	}
   372  
   373  	return nil
   374  }
   375  
   376  type blockTimer struct {
   377  	duration  time.Duration
   378  	startTime *time.Time
   379  }
   380  
   381  func (t *blockTimer) startTimer() {
   382  	now := time.Now()
   383  	t.startTime = &now
   384  }
   385  
   386  func (t *blockTimer) stopTimer() {
   387  	if t.startTime != nil {
   388  		t.duration = t.duration + time.Since(*t.startTime)
   389  		t.startTime = nil
   390  	}
   391  }