code.vegaprotocol.io/vega@v0.79.0/cmd/data-node/commands/start/node_pre.go (about)

     1  // Copyright (C) 2023 Gobalsky Labs Limited
     2  //
     3  // This program is free software: you can redistribute it and/or modify
     4  // it under the terms of the GNU Affero General Public License as
     5  // published by the Free Software Foundation, either version 3 of the
     6  // License, or (at your option) any later version.
     7  //
     8  // This program is distributed in the hope that it will be useful,
     9  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    10  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    11  // GNU Affero General Public License for more details.
    12  //
    13  // You should have received a copy of the GNU Affero General Public License
    14  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    15  
    16  package start
    17  
    18  import (
    19  	"context"
    20  	"fmt"
    21  	"os"
    22  	"path/filepath"
    23  	"time"
    24  
    25  	"code.vegaprotocol.io/vega/datanode/broker"
    26  	"code.vegaprotocol.io/vega/datanode/config"
    27  	"code.vegaprotocol.io/vega/datanode/networkhistory"
    28  	"code.vegaprotocol.io/vega/datanode/networkhistory/ipfs"
    29  	"code.vegaprotocol.io/vega/datanode/networkhistory/snapshot"
    30  	"code.vegaprotocol.io/vega/datanode/networkhistory/store"
    31  	"code.vegaprotocol.io/vega/datanode/sqlstore"
    32  	"code.vegaprotocol.io/vega/libs/fs"
    33  	"code.vegaprotocol.io/vega/libs/pprof"
    34  	"code.vegaprotocol.io/vega/libs/subscribers"
    35  	"code.vegaprotocol.io/vega/logging"
    36  	"code.vegaprotocol.io/vega/paths"
    37  	vegaprotoapi "code.vegaprotocol.io/vega/protos/vega/api/v1"
    38  
    39  	"github.com/cenkalti/backoff"
    40  	"google.golang.org/grpc"
    41  	"gopkg.in/natefinch/lumberjack.v2"
    42  )
    43  
    44  func (l *NodeCommand) persistentPre([]string) (err error) {
    45  	// ensure we cancel the context on error
    46  	defer func() {
    47  		if err != nil {
    48  			l.cancel()
    49  		}
    50  	}()
    51  
    52  	conf := l.configWatcher.Get()
    53  
    54  	// reload logger with the setup from configuration
    55  	l.Log = logging.NewLoggerFromConfig(conf.Logging).Named(l.Log.GetName())
    56  
    57  	preLog := l.Log.Named("start.persistentPre")
    58  
    59  	if conf.Pprof.Enabled {
    60  		preLog.Info("vega is starting with pprof profile, this is not a recommended setting for production")
    61  		l.pproffhandlr, err = pprof.New(l.Log, conf.Pprof)
    62  		if err != nil {
    63  			return
    64  		}
    65  		l.configWatcher.OnConfigUpdate(
    66  			func(cfg config.Config) { l.pproffhandlr.ReloadConf(cfg.Pprof) },
    67  		)
    68  	}
    69  
    70  	preLog.Info("Starting Vega Datanode",
    71  		logging.String("version", l.Version),
    72  		logging.String("version-hash", l.VersionHash))
    73  
    74  	if l.conf.SQLStore.UseEmbedded {
    75  		logDir := l.vegaPaths.StatePathFor(paths.DataNodeLogsHome)
    76  		postgresLogger := &lumberjack.Logger{
    77  			Filename: filepath.Join(logDir, "embedded-postgres.log"),
    78  			MaxSize:  l.conf.SQLStore.LogRotationConfig.MaxSize,
    79  			MaxAge:   l.conf.SQLStore.LogRotationConfig.MaxAge,
    80  			Compress: true,
    81  		}
    82  
    83  		runtimeDir := l.vegaPaths.StatePathFor(paths.DataNodeEmbeddedPostgresRuntimeDir)
    84  		l.embeddedPostgres, err = sqlstore.StartEmbeddedPostgres(l.Log, l.conf.SQLStore,
    85  			runtimeDir, postgresLogger)
    86  
    87  		if err != nil {
    88  			return fmt.Errorf("failed to start embedded postgres: %w", err)
    89  		}
    90  
    91  		go func() {
    92  			for range l.ctx.Done() {
    93  				l.embeddedPostgres.Stop()
    94  			}
    95  		}()
    96  	}
    97  
    98  	if l.conf.SQLStore.WipeOnStartup {
    99  		if ResetDatabaseAndNetworkHistory(l.ctx, l.Log, l.vegaPaths, l.conf.SQLStore.ConnectionConfig); err != nil {
   100  			return fmt.Errorf("failed to reset database and network history: %w", err)
   101  		}
   102  	} else if !l.conf.SQLStore.WipeOnStartup && l.conf.NetworkHistory.Enabled {
   103  		ipfsDir := filepath.Join(l.vegaPaths.StatePathFor(paths.DataNodeNetworkHistoryHome), "store", "ipfs")
   104  		ipfsExists, err := fs.PathExists(ipfsDir)
   105  		if err != nil {
   106  			return fmt.Errorf("failed to check if ipfs store is already initialized")
   107  		}
   108  
   109  		// We do not care for migration when the ipfs store does not exist on the local file system
   110  		if ipfsExists {
   111  			preLog.Info("Migrating the IPFS storage to the latest version")
   112  			if err := ipfs.MigrateIpfsStorageVersion(preLog, ipfsDir); err != nil {
   113  				return fmt.Errorf("failed to migrate the ipfs version")
   114  			}
   115  			preLog.Info("Migrating the IPFS storage finished")
   116  		} else {
   117  			preLog.Info("IPFS store not initialized. Migration not needed")
   118  		}
   119  	}
   120  
   121  	initialisedFromNetworkHistory := false
   122  	if l.conf.NetworkHistory.Enabled {
   123  		preLog.Info("Initializing Network History")
   124  
   125  		if l.conf.AutoInitialiseFromNetworkHistory {
   126  			if err := networkhistory.KillAllConnectionsToDatabase(l.ctx, l.conf.SQLStore.ConnectionConfig); err != nil {
   127  				return fmt.Errorf("failed to kill all connections to database: %w", err)
   128  			}
   129  		}
   130  
   131  		err = l.initialiseNetworkHistory(preLog, l.conf.SQLStore.ConnectionConfig)
   132  		if err != nil {
   133  			l.Log.Error("Failed to initialise network history", logging.Error(err))
   134  			return fmt.Errorf("failed to initialise network history:%w", err)
   135  		}
   136  
   137  		if l.conf.AutoInitialiseFromNetworkHistory {
   138  			preLog.Info("Auto Initialising Datanode From Network History")
   139  			apiPorts := []int{l.conf.API.Port}
   140  			apiPorts = append(apiPorts, l.conf.NetworkHistory.Initialise.GrpcAPIPorts...)
   141  
   142  			if err = networkhistory.InitialiseDatanodeFromNetworkHistory(l.ctx, l.conf.NetworkHistory.Initialise,
   143  				preLog, l.conf.SQLStore.ConnectionConfig, l.networkHistoryService, apiPorts,
   144  				bool(l.conf.SQLStore.VerboseMigration)); err != nil {
   145  				return fmt.Errorf("failed to initialize datanode from network history: %w", err)
   146  			}
   147  
   148  			initialisedFromNetworkHistory = true
   149  			preLog.Info("Initialized from network history")
   150  		}
   151  	}
   152  
   153  	if !initialisedFromNetworkHistory {
   154  		operation := func() (opErr error) {
   155  			preLog.Info("Attempting to initialise database...")
   156  			opErr = l.initialiseDatabase(preLog)
   157  			if opErr != nil {
   158  				preLog.Error("Failed to initialise database, retrying...", logging.Error(opErr))
   159  			}
   160  			preLog.Info("Database initialised")
   161  			return opErr
   162  		}
   163  
   164  		retryConfig := l.conf.SQLStore.ConnectionRetryConfig
   165  
   166  		expBackoff := backoff.NewExponentialBackOff()
   167  		expBackoff.InitialInterval = retryConfig.InitialInterval
   168  		expBackoff.MaxInterval = retryConfig.MaxInterval
   169  		expBackoff.MaxElapsedTime = retryConfig.MaxElapsedTime
   170  
   171  		err = backoff.Retry(operation, backoff.WithMaxRetries(expBackoff, retryConfig.MaxRetries))
   172  		if err != nil {
   173  			return fmt.Errorf("failed to connect to database: %w", err)
   174  		}
   175  	}
   176  
   177  	preLog.Info("Applying Data Retention Policies")
   178  
   179  	err = sqlstore.ApplyDataRetentionPolicies(l.conf.SQLStore, preLog)
   180  	if err != nil {
   181  		return fmt.Errorf("failed to apply data retention policies:%w", err)
   182  	}
   183  
   184  	// check that the schema version matches the latest migration, because if it doesn't queries might fail if rows/tables
   185  	// it expects to exist don't
   186  	if err := sqlstore.CheckSchemaVersionsSynced(l.Log, conf.SQLStore.ConnectionConfig, sqlstore.EmbedMigrations); err != nil {
   187  		return err
   188  	}
   189  
   190  	preLog.Info("Enabling SQL stores")
   191  
   192  	l.transactionalConnectionSource, err = sqlstore.NewTransactionalConnectionSource(l.ctx, preLog, l.conf.SQLStore.ConnectionConfig)
   193  	if err != nil {
   194  		return fmt.Errorf("failed to create transactional connection source: %w", err)
   195  	}
   196  
   197  	logSqlstore := l.Log.Named("sqlstore")
   198  	l.CreateAllStores(l.ctx, logSqlstore, l.transactionalConnectionSource, l.conf.CandlesV2.CandleStore)
   199  
   200  	logService := l.Log.Named("service")
   201  	logService.SetLevel(l.conf.Service.Level.Get())
   202  	if err := l.SetupServices(l.ctx, logService, l.conf.Service, l.conf.CandlesV2); err != nil {
   203  		return err
   204  	}
   205  
   206  	err = networkhistory.VerifyChainID(l.conf.ChainID, l.chainService)
   207  	if err != nil {
   208  		return fmt.Errorf("failed to verify chain id:%w", err)
   209  	}
   210  
   211  	l.SetupSQLSubscribers()
   212  
   213  	return nil
   214  }
   215  
   216  func (l *NodeCommand) initialiseDatabase(preLog *logging.Logger) error {
   217  	var err error
   218  	conf := l.conf.SQLStore.ConnectionConfig
   219  	conf.MaxConnPoolSize = 1
   220  	pool, err := sqlstore.CreateConnectionPool(l.ctx, conf)
   221  	if err != nil {
   222  		return fmt.Errorf("failed to create connection pool: %w", err)
   223  	}
   224  	defer pool.Close()
   225  
   226  	hasVegaSchema, err := sqlstore.HasVegaSchema(l.ctx, pool)
   227  	if err != nil {
   228  		return fmt.Errorf("failed to check if database has schema: %w", err)
   229  	}
   230  
   231  	// If it's an empty database, recreate it with correct locale settings
   232  	if !hasVegaSchema {
   233  		err = sqlstore.RecreateVegaDatabase(l.ctx, preLog, l.conf.SQLStore.ConnectionConfig)
   234  		if err != nil {
   235  			return fmt.Errorf("failed to recreate vega schema: %w", err)
   236  		}
   237  	}
   238  
   239  	err = sqlstore.MigrateToLatestSchema(preLog, l.conf.SQLStore)
   240  	if err != nil {
   241  		return fmt.Errorf("failed to migrate to latest schema:%w", err)
   242  	}
   243  
   244  	return nil
   245  }
   246  
   247  // we've already set everything up WRT arguments etc... just bootstrap the node.
   248  func (l *NodeCommand) preRun([]string) (err error) {
   249  	// ensure that context is cancelled if we return an error here
   250  	defer func() {
   251  		if err != nil {
   252  			l.cancel()
   253  		}
   254  	}()
   255  
   256  	preLog := l.Log.Named("start.preRun")
   257  	brokerLog := l.Log.Named("broker")
   258  	eventSourceLog := brokerLog.Named("eventsource")
   259  
   260  	eventReceiverSender, err := broker.NewEventReceiverSender(l.conf.Broker, eventSourceLog, l.conf.ChainID)
   261  	if err != nil {
   262  		preLog.Error("unable to initialise event source", logging.Error(err))
   263  		return err
   264  	}
   265  
   266  	var rawEventSource broker.RawEventReceiver = eventReceiverSender
   267  
   268  	if l.conf.Broker.UseBufferedEventSource {
   269  		bufferFilePath, err := l.vegaPaths.CreateStatePathFor(paths.DataNodeEventBufferHome)
   270  		if err != nil {
   271  			preLog.Error("failed to create path for buffered event source", logging.Error(err))
   272  			return err
   273  		}
   274  
   275  		archiveFilesPath, err := l.vegaPaths.CreateStatePathFor(paths.DataNodeArchivedEventBufferHome)
   276  		if err != nil {
   277  			l.Log.Error("failed to create archive path for buffered event source", logging.Error(err))
   278  			return err
   279  		}
   280  
   281  		rawEventSource, err = broker.NewBufferedEventSource(l.ctx, l.Log, l.conf.Broker.BufferedEventSourceConfig, eventReceiverSender,
   282  			bufferFilePath, archiveFilesPath)
   283  		if err != nil {
   284  			preLog.Error("unable to initialise file buffered event source", logging.Error(err))
   285  			return err
   286  		}
   287  	}
   288  
   289  	var eventSource broker.EventReceiver
   290  	eventSource = broker.NewDeserializer(rawEventSource)
   291  	eventSource = broker.NewFanOutEventSource(eventSource, l.conf.SQLStore.FanOutBufferSize, 2)
   292  
   293  	var onBlockCommittedHandler func(ctx context.Context, chainId string, lastCommittedBlockHeight int64, snapshotTaken bool)
   294  	var protocolUpgradeHandler broker.ProtocolUpgradeHandler
   295  
   296  	if l.conf.NetworkHistory.Enabled {
   297  		blockCommitHandler := networkhistory.NewBlockCommitHandler(l.Log, l.conf.NetworkHistory, l.snapshotService.SnapshotData,
   298  			bool(l.conf.Broker.UseEventFile), l.conf.Broker.FileEventSourceConfig.TimeBetweenBlocks.Duration,
   299  			5*time.Second, 6)
   300  		onBlockCommittedHandler = blockCommitHandler.OnBlockCommitted
   301  		protocolUpgradeHandler = networkhistory.NewProtocolUpgradeHandler(l.Log, l.protocolUpgradeService, eventReceiverSender,
   302  			l.networkHistoryService.CreateAndPublishSegment)
   303  	} else {
   304  		onBlockCommittedHandler = func(ctx context.Context, chainId string, lastCommittedBlockHeight int64, snapshotTaken bool) {}
   305  		protocolUpgradeHandler = networkhistory.NewProtocolUpgradeHandler(l.Log, l.protocolUpgradeService, eventReceiverSender,
   306  			func(ctx context.Context, chainID string, toHeight int64) error { return nil })
   307  	}
   308  
   309  	l.sqlBroker = broker.NewSQLStoreBroker(l.Log, l.conf.Broker, l.conf.ChainID, eventSource,
   310  		l.transactionalConnectionSource,
   311  		l.blockStore,
   312  		onBlockCommittedHandler,
   313  		protocolUpgradeHandler,
   314  		l.GetSQLSubscribers(),
   315  	)
   316  
   317  	l.broker, err = broker.New(l.ctx, brokerLog, l.conf.Broker, l.conf.ChainID, eventSource)
   318  	if err != nil {
   319  		preLog.Error("unable to initialise broker", logging.Error(err))
   320  		return err
   321  	}
   322  
   323  	// Event service as used by old and new world
   324  	l.eventService = subscribers.NewService(preLog, l.broker, l.conf.Broker.EventBusClientBufferSize)
   325  
   326  	nodeAddr := fmt.Sprintf("%v:%v", l.conf.API.CoreNodeIP, l.conf.API.CoreNodeGRPCPort)
   327  	conn, err := grpc.Dial(nodeAddr, grpc.WithInsecure())
   328  	if err != nil {
   329  		return err
   330  	}
   331  
   332  	l.vegaCoreServiceClient = vegaprotoapi.NewCoreServiceClient(conn)
   333  	return nil
   334  }
   335  
   336  func (l *NodeCommand) initialiseNetworkHistory(preLog *logging.Logger, connConfig sqlstore.ConnectionConfig) error {
   337  	// Want to pre-allocate some connections to ensure a connection is always available,
   338  	// 3 is chosen to allow for the fact that pool size can temporarily drop below the min pool size.
   339  	connConfig.MaxConnPoolSize = 3
   340  	connConfig.MinConnPoolSize = 3
   341  
   342  	networkHistoryPool, err := sqlstore.CreateConnectionPool(l.ctx, connConfig)
   343  	if err != nil {
   344  		return fmt.Errorf("failed to create network history connection pool: %w", err)
   345  	}
   346  
   347  	preNetworkHistoryLog := preLog.Named("networkHistory")
   348  	networkHistoryLog := l.Log.Named("networkHistory")
   349  	networkHistoryLog.SetLevel(l.conf.NetworkHistory.Level.Get())
   350  
   351  	snapshotServiceLog := networkHistoryLog.Named("snapshot")
   352  	networkHistoryServiceLog := networkHistoryLog.Named("service")
   353  	home := l.vegaPaths.StatePathFor(paths.DataNodeNetworkHistoryHome)
   354  
   355  	networkHistoryStore, err := store.New(l.ctx, networkHistoryServiceLog, l.conf.ChainID, l.conf.NetworkHistory.Store, home,
   356  		l.conf.MaxMemoryPercent)
   357  	if err != nil {
   358  		return fmt.Errorf("failed to create network history store: %w", err)
   359  	}
   360  
   361  	l.snapshotService, err = snapshot.NewSnapshotService(snapshotServiceLog, l.conf.NetworkHistory.Snapshot,
   362  		networkHistoryPool, networkHistoryStore,
   363  		l.vegaPaths.StatePathFor(paths.DataNodeNetworkHistorySnapshotCopyTo), func(version int64) error {
   364  			if err = sqlstore.MigrateUpToSchemaVersion(preNetworkHistoryLog, l.conf.SQLStore, version, sqlstore.EmbedMigrations); err != nil {
   365  				return fmt.Errorf("failed to migrate up to schema version %d: %w", version, err)
   366  			}
   367  			return nil
   368  		},
   369  		func(version int64) error {
   370  			if err = sqlstore.MigrateDownToSchemaVersion(preNetworkHistoryLog, l.conf.SQLStore, version, sqlstore.EmbedMigrations); err != nil {
   371  				return fmt.Errorf("failed to migrate down to schema version %d: %w", version, err)
   372  			}
   373  			return nil
   374  		})
   375  	if err != nil {
   376  		return fmt.Errorf("failed to create snapshot service:%w", err)
   377  	}
   378  
   379  	l.networkHistoryService, err = networkhistory.New(l.ctx, networkHistoryServiceLog, l.conf.ChainID, l.conf.NetworkHistory,
   380  		networkHistoryPool,
   381  		l.snapshotService,
   382  		networkHistoryStore,
   383  		l.conf.API.Port,
   384  		l.vegaPaths.StatePathFor(paths.DataNodeNetworkHistorySnapshotCopyTo))
   385  	if err != nil {
   386  		return fmt.Errorf("failed to create networkHistory service:%w", err)
   387  	}
   388  
   389  	return nil
   390  }
   391  
   392  func ResetDatabaseAndNetworkHistory(ctx context.Context, log *logging.Logger, vegaPaths paths.Paths, connConfig sqlstore.ConnectionConfig) error {
   393  	err := os.RemoveAll(vegaPaths.StatePathFor(paths.DataNodeNetworkHistoryHome))
   394  	if err != nil {
   395  		return fmt.Errorf("failed to remove network history dir: %w", err)
   396  	}
   397  
   398  	log.Info("Wiped all network history")
   399  
   400  	if err := sqlstore.RecreateVegaDatabase(ctx, log, connConfig); err != nil {
   401  		return fmt.Errorf("failed to wipe database:%w", err)
   402  	}
   403  	log.Info("Wiped all existing data from the database")
   404  	return nil
   405  }