code.vegaprotocol.io/vega@v0.79.0/cmd/data-node/commands/start/node_pre.go (about) 1 // Copyright (C) 2023 Gobalsky Labs Limited 2 // 3 // This program is free software: you can redistribute it and/or modify 4 // it under the terms of the GNU Affero General Public License as 5 // published by the Free Software Foundation, either version 3 of the 6 // License, or (at your option) any later version. 7 // 8 // This program is distributed in the hope that it will be useful, 9 // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 // GNU Affero General Public License for more details. 12 // 13 // You should have received a copy of the GNU Affero General Public License 14 // along with this program. If not, see <http://www.gnu.org/licenses/>. 15 16 package start 17 18 import ( 19 "context" 20 "fmt" 21 "os" 22 "path/filepath" 23 "time" 24 25 "code.vegaprotocol.io/vega/datanode/broker" 26 "code.vegaprotocol.io/vega/datanode/config" 27 "code.vegaprotocol.io/vega/datanode/networkhistory" 28 "code.vegaprotocol.io/vega/datanode/networkhistory/ipfs" 29 "code.vegaprotocol.io/vega/datanode/networkhistory/snapshot" 30 "code.vegaprotocol.io/vega/datanode/networkhistory/store" 31 "code.vegaprotocol.io/vega/datanode/sqlstore" 32 "code.vegaprotocol.io/vega/libs/fs" 33 "code.vegaprotocol.io/vega/libs/pprof" 34 "code.vegaprotocol.io/vega/libs/subscribers" 35 "code.vegaprotocol.io/vega/logging" 36 "code.vegaprotocol.io/vega/paths" 37 vegaprotoapi "code.vegaprotocol.io/vega/protos/vega/api/v1" 38 39 "github.com/cenkalti/backoff" 40 "google.golang.org/grpc" 41 "gopkg.in/natefinch/lumberjack.v2" 42 ) 43 44 func (l *NodeCommand) persistentPre([]string) (err error) { 45 // ensure we cancel the context on error 46 defer func() { 47 if err != nil { 48 l.cancel() 49 } 50 }() 51 52 conf := l.configWatcher.Get() 53 54 // reload logger with the setup from configuration 55 l.Log = logging.NewLoggerFromConfig(conf.Logging).Named(l.Log.GetName()) 56 57 preLog := l.Log.Named("start.persistentPre") 58 59 if conf.Pprof.Enabled { 60 preLog.Info("vega is starting with pprof profile, this is not a recommended setting for production") 61 l.pproffhandlr, err = pprof.New(l.Log, conf.Pprof) 62 if err != nil { 63 return 64 } 65 l.configWatcher.OnConfigUpdate( 66 func(cfg config.Config) { l.pproffhandlr.ReloadConf(cfg.Pprof) }, 67 ) 68 } 69 70 preLog.Info("Starting Vega Datanode", 71 logging.String("version", l.Version), 72 logging.String("version-hash", l.VersionHash)) 73 74 if l.conf.SQLStore.UseEmbedded { 75 logDir := l.vegaPaths.StatePathFor(paths.DataNodeLogsHome) 76 postgresLogger := &lumberjack.Logger{ 77 Filename: filepath.Join(logDir, "embedded-postgres.log"), 78 MaxSize: l.conf.SQLStore.LogRotationConfig.MaxSize, 79 MaxAge: l.conf.SQLStore.LogRotationConfig.MaxAge, 80 Compress: true, 81 } 82 83 runtimeDir := l.vegaPaths.StatePathFor(paths.DataNodeEmbeddedPostgresRuntimeDir) 84 l.embeddedPostgres, err = sqlstore.StartEmbeddedPostgres(l.Log, l.conf.SQLStore, 85 runtimeDir, postgresLogger) 86 87 if err != nil { 88 return fmt.Errorf("failed to start embedded postgres: %w", err) 89 } 90 91 go func() { 92 for range l.ctx.Done() { 93 l.embeddedPostgres.Stop() 94 } 95 }() 96 } 97 98 if l.conf.SQLStore.WipeOnStartup { 99 if ResetDatabaseAndNetworkHistory(l.ctx, l.Log, l.vegaPaths, l.conf.SQLStore.ConnectionConfig); err != nil { 100 return fmt.Errorf("failed to reset database and network history: %w", err) 101 } 102 } else if !l.conf.SQLStore.WipeOnStartup && l.conf.NetworkHistory.Enabled { 103 ipfsDir := filepath.Join(l.vegaPaths.StatePathFor(paths.DataNodeNetworkHistoryHome), "store", "ipfs") 104 ipfsExists, err := fs.PathExists(ipfsDir) 105 if err != nil { 106 return fmt.Errorf("failed to check if ipfs store is already initialized") 107 } 108 109 // We do not care for migration when the ipfs store does not exist on the local file system 110 if ipfsExists { 111 preLog.Info("Migrating the IPFS storage to the latest version") 112 if err := ipfs.MigrateIpfsStorageVersion(preLog, ipfsDir); err != nil { 113 return fmt.Errorf("failed to migrate the ipfs version") 114 } 115 preLog.Info("Migrating the IPFS storage finished") 116 } else { 117 preLog.Info("IPFS store not initialized. Migration not needed") 118 } 119 } 120 121 initialisedFromNetworkHistory := false 122 if l.conf.NetworkHistory.Enabled { 123 preLog.Info("Initializing Network History") 124 125 if l.conf.AutoInitialiseFromNetworkHistory { 126 if err := networkhistory.KillAllConnectionsToDatabase(l.ctx, l.conf.SQLStore.ConnectionConfig); err != nil { 127 return fmt.Errorf("failed to kill all connections to database: %w", err) 128 } 129 } 130 131 err = l.initialiseNetworkHistory(preLog, l.conf.SQLStore.ConnectionConfig) 132 if err != nil { 133 l.Log.Error("Failed to initialise network history", logging.Error(err)) 134 return fmt.Errorf("failed to initialise network history:%w", err) 135 } 136 137 if l.conf.AutoInitialiseFromNetworkHistory { 138 preLog.Info("Auto Initialising Datanode From Network History") 139 apiPorts := []int{l.conf.API.Port} 140 apiPorts = append(apiPorts, l.conf.NetworkHistory.Initialise.GrpcAPIPorts...) 141 142 if err = networkhistory.InitialiseDatanodeFromNetworkHistory(l.ctx, l.conf.NetworkHistory.Initialise, 143 preLog, l.conf.SQLStore.ConnectionConfig, l.networkHistoryService, apiPorts, 144 bool(l.conf.SQLStore.VerboseMigration)); err != nil { 145 return fmt.Errorf("failed to initialize datanode from network history: %w", err) 146 } 147 148 initialisedFromNetworkHistory = true 149 preLog.Info("Initialized from network history") 150 } 151 } 152 153 if !initialisedFromNetworkHistory { 154 operation := func() (opErr error) { 155 preLog.Info("Attempting to initialise database...") 156 opErr = l.initialiseDatabase(preLog) 157 if opErr != nil { 158 preLog.Error("Failed to initialise database, retrying...", logging.Error(opErr)) 159 } 160 preLog.Info("Database initialised") 161 return opErr 162 } 163 164 retryConfig := l.conf.SQLStore.ConnectionRetryConfig 165 166 expBackoff := backoff.NewExponentialBackOff() 167 expBackoff.InitialInterval = retryConfig.InitialInterval 168 expBackoff.MaxInterval = retryConfig.MaxInterval 169 expBackoff.MaxElapsedTime = retryConfig.MaxElapsedTime 170 171 err = backoff.Retry(operation, backoff.WithMaxRetries(expBackoff, retryConfig.MaxRetries)) 172 if err != nil { 173 return fmt.Errorf("failed to connect to database: %w", err) 174 } 175 } 176 177 preLog.Info("Applying Data Retention Policies") 178 179 err = sqlstore.ApplyDataRetentionPolicies(l.conf.SQLStore, preLog) 180 if err != nil { 181 return fmt.Errorf("failed to apply data retention policies:%w", err) 182 } 183 184 // check that the schema version matches the latest migration, because if it doesn't queries might fail if rows/tables 185 // it expects to exist don't 186 if err := sqlstore.CheckSchemaVersionsSynced(l.Log, conf.SQLStore.ConnectionConfig, sqlstore.EmbedMigrations); err != nil { 187 return err 188 } 189 190 preLog.Info("Enabling SQL stores") 191 192 l.transactionalConnectionSource, err = sqlstore.NewTransactionalConnectionSource(l.ctx, preLog, l.conf.SQLStore.ConnectionConfig) 193 if err != nil { 194 return fmt.Errorf("failed to create transactional connection source: %w", err) 195 } 196 197 logSqlstore := l.Log.Named("sqlstore") 198 l.CreateAllStores(l.ctx, logSqlstore, l.transactionalConnectionSource, l.conf.CandlesV2.CandleStore) 199 200 logService := l.Log.Named("service") 201 logService.SetLevel(l.conf.Service.Level.Get()) 202 if err := l.SetupServices(l.ctx, logService, l.conf.Service, l.conf.CandlesV2); err != nil { 203 return err 204 } 205 206 err = networkhistory.VerifyChainID(l.conf.ChainID, l.chainService) 207 if err != nil { 208 return fmt.Errorf("failed to verify chain id:%w", err) 209 } 210 211 l.SetupSQLSubscribers() 212 213 return nil 214 } 215 216 func (l *NodeCommand) initialiseDatabase(preLog *logging.Logger) error { 217 var err error 218 conf := l.conf.SQLStore.ConnectionConfig 219 conf.MaxConnPoolSize = 1 220 pool, err := sqlstore.CreateConnectionPool(l.ctx, conf) 221 if err != nil { 222 return fmt.Errorf("failed to create connection pool: %w", err) 223 } 224 defer pool.Close() 225 226 hasVegaSchema, err := sqlstore.HasVegaSchema(l.ctx, pool) 227 if err != nil { 228 return fmt.Errorf("failed to check if database has schema: %w", err) 229 } 230 231 // If it's an empty database, recreate it with correct locale settings 232 if !hasVegaSchema { 233 err = sqlstore.RecreateVegaDatabase(l.ctx, preLog, l.conf.SQLStore.ConnectionConfig) 234 if err != nil { 235 return fmt.Errorf("failed to recreate vega schema: %w", err) 236 } 237 } 238 239 err = sqlstore.MigrateToLatestSchema(preLog, l.conf.SQLStore) 240 if err != nil { 241 return fmt.Errorf("failed to migrate to latest schema:%w", err) 242 } 243 244 return nil 245 } 246 247 // we've already set everything up WRT arguments etc... just bootstrap the node. 248 func (l *NodeCommand) preRun([]string) (err error) { 249 // ensure that context is cancelled if we return an error here 250 defer func() { 251 if err != nil { 252 l.cancel() 253 } 254 }() 255 256 preLog := l.Log.Named("start.preRun") 257 brokerLog := l.Log.Named("broker") 258 eventSourceLog := brokerLog.Named("eventsource") 259 260 eventReceiverSender, err := broker.NewEventReceiverSender(l.conf.Broker, eventSourceLog, l.conf.ChainID) 261 if err != nil { 262 preLog.Error("unable to initialise event source", logging.Error(err)) 263 return err 264 } 265 266 var rawEventSource broker.RawEventReceiver = eventReceiverSender 267 268 if l.conf.Broker.UseBufferedEventSource { 269 bufferFilePath, err := l.vegaPaths.CreateStatePathFor(paths.DataNodeEventBufferHome) 270 if err != nil { 271 preLog.Error("failed to create path for buffered event source", logging.Error(err)) 272 return err 273 } 274 275 archiveFilesPath, err := l.vegaPaths.CreateStatePathFor(paths.DataNodeArchivedEventBufferHome) 276 if err != nil { 277 l.Log.Error("failed to create archive path for buffered event source", logging.Error(err)) 278 return err 279 } 280 281 rawEventSource, err = broker.NewBufferedEventSource(l.ctx, l.Log, l.conf.Broker.BufferedEventSourceConfig, eventReceiverSender, 282 bufferFilePath, archiveFilesPath) 283 if err != nil { 284 preLog.Error("unable to initialise file buffered event source", logging.Error(err)) 285 return err 286 } 287 } 288 289 var eventSource broker.EventReceiver 290 eventSource = broker.NewDeserializer(rawEventSource) 291 eventSource = broker.NewFanOutEventSource(eventSource, l.conf.SQLStore.FanOutBufferSize, 2) 292 293 var onBlockCommittedHandler func(ctx context.Context, chainId string, lastCommittedBlockHeight int64, snapshotTaken bool) 294 var protocolUpgradeHandler broker.ProtocolUpgradeHandler 295 296 if l.conf.NetworkHistory.Enabled { 297 blockCommitHandler := networkhistory.NewBlockCommitHandler(l.Log, l.conf.NetworkHistory, l.snapshotService.SnapshotData, 298 bool(l.conf.Broker.UseEventFile), l.conf.Broker.FileEventSourceConfig.TimeBetweenBlocks.Duration, 299 5*time.Second, 6) 300 onBlockCommittedHandler = blockCommitHandler.OnBlockCommitted 301 protocolUpgradeHandler = networkhistory.NewProtocolUpgradeHandler(l.Log, l.protocolUpgradeService, eventReceiverSender, 302 l.networkHistoryService.CreateAndPublishSegment) 303 } else { 304 onBlockCommittedHandler = func(ctx context.Context, chainId string, lastCommittedBlockHeight int64, snapshotTaken bool) {} 305 protocolUpgradeHandler = networkhistory.NewProtocolUpgradeHandler(l.Log, l.protocolUpgradeService, eventReceiverSender, 306 func(ctx context.Context, chainID string, toHeight int64) error { return nil }) 307 } 308 309 l.sqlBroker = broker.NewSQLStoreBroker(l.Log, l.conf.Broker, l.conf.ChainID, eventSource, 310 l.transactionalConnectionSource, 311 l.blockStore, 312 onBlockCommittedHandler, 313 protocolUpgradeHandler, 314 l.GetSQLSubscribers(), 315 ) 316 317 l.broker, err = broker.New(l.ctx, brokerLog, l.conf.Broker, l.conf.ChainID, eventSource) 318 if err != nil { 319 preLog.Error("unable to initialise broker", logging.Error(err)) 320 return err 321 } 322 323 // Event service as used by old and new world 324 l.eventService = subscribers.NewService(preLog, l.broker, l.conf.Broker.EventBusClientBufferSize) 325 326 nodeAddr := fmt.Sprintf("%v:%v", l.conf.API.CoreNodeIP, l.conf.API.CoreNodeGRPCPort) 327 conn, err := grpc.Dial(nodeAddr, grpc.WithInsecure()) 328 if err != nil { 329 return err 330 } 331 332 l.vegaCoreServiceClient = vegaprotoapi.NewCoreServiceClient(conn) 333 return nil 334 } 335 336 func (l *NodeCommand) initialiseNetworkHistory(preLog *logging.Logger, connConfig sqlstore.ConnectionConfig) error { 337 // Want to pre-allocate some connections to ensure a connection is always available, 338 // 3 is chosen to allow for the fact that pool size can temporarily drop below the min pool size. 339 connConfig.MaxConnPoolSize = 3 340 connConfig.MinConnPoolSize = 3 341 342 networkHistoryPool, err := sqlstore.CreateConnectionPool(l.ctx, connConfig) 343 if err != nil { 344 return fmt.Errorf("failed to create network history connection pool: %w", err) 345 } 346 347 preNetworkHistoryLog := preLog.Named("networkHistory") 348 networkHistoryLog := l.Log.Named("networkHistory") 349 networkHistoryLog.SetLevel(l.conf.NetworkHistory.Level.Get()) 350 351 snapshotServiceLog := networkHistoryLog.Named("snapshot") 352 networkHistoryServiceLog := networkHistoryLog.Named("service") 353 home := l.vegaPaths.StatePathFor(paths.DataNodeNetworkHistoryHome) 354 355 networkHistoryStore, err := store.New(l.ctx, networkHistoryServiceLog, l.conf.ChainID, l.conf.NetworkHistory.Store, home, 356 l.conf.MaxMemoryPercent) 357 if err != nil { 358 return fmt.Errorf("failed to create network history store: %w", err) 359 } 360 361 l.snapshotService, err = snapshot.NewSnapshotService(snapshotServiceLog, l.conf.NetworkHistory.Snapshot, 362 networkHistoryPool, networkHistoryStore, 363 l.vegaPaths.StatePathFor(paths.DataNodeNetworkHistorySnapshotCopyTo), func(version int64) error { 364 if err = sqlstore.MigrateUpToSchemaVersion(preNetworkHistoryLog, l.conf.SQLStore, version, sqlstore.EmbedMigrations); err != nil { 365 return fmt.Errorf("failed to migrate up to schema version %d: %w", version, err) 366 } 367 return nil 368 }, 369 func(version int64) error { 370 if err = sqlstore.MigrateDownToSchemaVersion(preNetworkHistoryLog, l.conf.SQLStore, version, sqlstore.EmbedMigrations); err != nil { 371 return fmt.Errorf("failed to migrate down to schema version %d: %w", version, err) 372 } 373 return nil 374 }) 375 if err != nil { 376 return fmt.Errorf("failed to create snapshot service:%w", err) 377 } 378 379 l.networkHistoryService, err = networkhistory.New(l.ctx, networkHistoryServiceLog, l.conf.ChainID, l.conf.NetworkHistory, 380 networkHistoryPool, 381 l.snapshotService, 382 networkHistoryStore, 383 l.conf.API.Port, 384 l.vegaPaths.StatePathFor(paths.DataNodeNetworkHistorySnapshotCopyTo)) 385 if err != nil { 386 return fmt.Errorf("failed to create networkHistory service:%w", err) 387 } 388 389 return nil 390 } 391 392 func ResetDatabaseAndNetworkHistory(ctx context.Context, log *logging.Logger, vegaPaths paths.Paths, connConfig sqlstore.ConnectionConfig) error { 393 err := os.RemoveAll(vegaPaths.StatePathFor(paths.DataNodeNetworkHistoryHome)) 394 if err != nil { 395 return fmt.Errorf("failed to remove network history dir: %w", err) 396 } 397 398 log.Info("Wiped all network history") 399 400 if err := sqlstore.RecreateVegaDatabase(ctx, log, connConfig); err != nil { 401 return fmt.Errorf("failed to wipe database:%w", err) 402 } 403 log.Info("Wiped all existing data from the database") 404 return nil 405 }