github.com/weaviate/weaviate@v1.24.6/adapters/handlers/rest/configure_api.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package rest
    13  
    14  import (
    15  	"context"
    16  	"encoding/json"
    17  	"fmt"
    18  	"net"
    19  	"net/http"
    20  	"os"
    21  	goruntime "runtime"
    22  	"runtime/debug"
    23  	"strings"
    24  	"time"
    25  
    26  	enterrors "github.com/weaviate/weaviate/entities/errors"
    27  
    28  	_ "net/http/pprof"
    29  
    30  	"github.com/KimMachineGun/automemlimit/memlimit"
    31  	openapierrors "github.com/go-openapi/errors"
    32  	"github.com/go-openapi/runtime"
    33  	"github.com/go-openapi/swag"
    34  	"github.com/pbnjay/memory"
    35  	"github.com/pkg/errors"
    36  	"github.com/prometheus/client_golang/prometheus/promhttp"
    37  	"github.com/sirupsen/logrus"
    38  	"github.com/weaviate/weaviate/adapters/clients"
    39  	"github.com/weaviate/weaviate/adapters/handlers/rest/clusterapi"
    40  	"github.com/weaviate/weaviate/adapters/handlers/rest/operations"
    41  	"github.com/weaviate/weaviate/adapters/handlers/rest/state"
    42  	"github.com/weaviate/weaviate/adapters/repos/classifications"
    43  	"github.com/weaviate/weaviate/adapters/repos/db"
    44  	"github.com/weaviate/weaviate/adapters/repos/db/inverted"
    45  	modulestorage "github.com/weaviate/weaviate/adapters/repos/modules"
    46  	schemarepo "github.com/weaviate/weaviate/adapters/repos/schema"
    47  	txstore "github.com/weaviate/weaviate/adapters/repos/transactions"
    48  	"github.com/weaviate/weaviate/entities/moduletools"
    49  	"github.com/weaviate/weaviate/entities/replication"
    50  	vectorIndex "github.com/weaviate/weaviate/entities/vectorindex"
    51  	modstgazure "github.com/weaviate/weaviate/modules/backup-azure"
    52  	modstgfs "github.com/weaviate/weaviate/modules/backup-filesystem"
    53  	modstggcs "github.com/weaviate/weaviate/modules/backup-gcs"
    54  	modstgs3 "github.com/weaviate/weaviate/modules/backup-s3"
    55  	modgenerativeanyscale "github.com/weaviate/weaviate/modules/generative-anyscale"
    56  	modgenerativeaws "github.com/weaviate/weaviate/modules/generative-aws"
    57  	modgenerativecohere "github.com/weaviate/weaviate/modules/generative-cohere"
    58  	modgenerativemistral "github.com/weaviate/weaviate/modules/generative-mistral"
    59  	modgenerativeopenai "github.com/weaviate/weaviate/modules/generative-openai"
    60  	modgenerativepalm "github.com/weaviate/weaviate/modules/generative-palm"
    61  	modimage "github.com/weaviate/weaviate/modules/img2vec-neural"
    62  	modbind "github.com/weaviate/weaviate/modules/multi2vec-bind"
    63  	modclip "github.com/weaviate/weaviate/modules/multi2vec-clip"
    64  	modmulti2vecpalm "github.com/weaviate/weaviate/modules/multi2vec-palm"
    65  	modner "github.com/weaviate/weaviate/modules/ner-transformers"
    66  	modqnaopenai "github.com/weaviate/weaviate/modules/qna-openai"
    67  	modqna "github.com/weaviate/weaviate/modules/qna-transformers"
    68  	modcentroid "github.com/weaviate/weaviate/modules/ref2vec-centroid"
    69  	modrerankercohere "github.com/weaviate/weaviate/modules/reranker-cohere"
    70  	modrerankertransformers "github.com/weaviate/weaviate/modules/reranker-transformers"
    71  	modsum "github.com/weaviate/weaviate/modules/sum-transformers"
    72  	modspellcheck "github.com/weaviate/weaviate/modules/text-spellcheck"
    73  	modtext2vecaws "github.com/weaviate/weaviate/modules/text2vec-aws"
    74  	modcohere "github.com/weaviate/weaviate/modules/text2vec-cohere"
    75  	modcontextionary "github.com/weaviate/weaviate/modules/text2vec-contextionary"
    76  	modgpt4all "github.com/weaviate/weaviate/modules/text2vec-gpt4all"
    77  	modhuggingface "github.com/weaviate/weaviate/modules/text2vec-huggingface"
    78  	modjinaai "github.com/weaviate/weaviate/modules/text2vec-jinaai"
    79  	modopenai "github.com/weaviate/weaviate/modules/text2vec-openai"
    80  	modtext2vecpalm "github.com/weaviate/weaviate/modules/text2vec-palm"
    81  	modtransformers "github.com/weaviate/weaviate/modules/text2vec-transformers"
    82  	modvoyageai "github.com/weaviate/weaviate/modules/text2vec-voyageai"
    83  	"github.com/weaviate/weaviate/usecases/auth/authentication/composer"
    84  	"github.com/weaviate/weaviate/usecases/backup"
    85  	"github.com/weaviate/weaviate/usecases/classification"
    86  	"github.com/weaviate/weaviate/usecases/cluster"
    87  	"github.com/weaviate/weaviate/usecases/config"
    88  	"github.com/weaviate/weaviate/usecases/modules"
    89  	"github.com/weaviate/weaviate/usecases/monitoring"
    90  	"github.com/weaviate/weaviate/usecases/objects"
    91  	"github.com/weaviate/weaviate/usecases/replica"
    92  	"github.com/weaviate/weaviate/usecases/scaler"
    93  	schemaUC "github.com/weaviate/weaviate/usecases/schema"
    94  	"github.com/weaviate/weaviate/usecases/schema/migrate"
    95  	"github.com/weaviate/weaviate/usecases/sharding"
    96  	"github.com/weaviate/weaviate/usecases/telemetry"
    97  	"github.com/weaviate/weaviate/usecases/traverser"
    98  )
    99  
   100  const MinimumRequiredContextionaryVersion = "1.0.2"
   101  
   102  func makeConfigureServer(appState *state.State) func(*http.Server, string, string) {
   103  	return func(s *http.Server, scheme, addr string) {
   104  		// Add properties to the config
   105  		appState.ServerConfig.Hostname = addr
   106  		appState.ServerConfig.Scheme = scheme
   107  	}
   108  }
   109  
   110  type vectorRepo interface {
   111  	objects.BatchVectorRepo
   112  	traverser.VectorSearcher
   113  	classification.VectorRepo
   114  	scaler.BackUpper
   115  	SetSchemaGetter(schemaUC.SchemaGetter)
   116  	WaitForStartup(ctx context.Context) error
   117  	Shutdown(ctx context.Context) error
   118  }
   119  
   120  func getCores() (int, error) {
   121  	cpuset, err := os.ReadFile("/sys/fs/cgroup/cpuset/cpuset.cpus")
   122  	if err != nil {
   123  		return 0, errors.Wrap(err, "read cpuset")
   124  	}
   125  
   126  	cores := strings.Split(strings.TrimSpace(string(cpuset)), ",")
   127  	return len(cores), nil
   128  }
   129  
   130  func MakeAppState(ctx context.Context, options *swag.CommandLineOptionsGroup) *state.State {
   131  	appState := startupRoutine(ctx, options)
   132  	setupGoProfiling(appState.ServerConfig.Config, appState.Logger)
   133  
   134  	if appState.ServerConfig.Config.Monitoring.Enabled {
   135  		// only monitoring tool supported at the moment is prometheus
   136  		enterrors.GoWrapper(func() {
   137  			mux := http.NewServeMux()
   138  			mux.Handle("/metrics", promhttp.Handler())
   139  			http.ListenAndServe(fmt.Sprintf(":%d", appState.ServerConfig.Config.Monitoring.Port), mux)
   140  		}, appState.Logger)
   141  	}
   142  
   143  	limitResources(appState)
   144  
   145  	err := registerModules(appState)
   146  	if err != nil {
   147  		appState.Logger.
   148  			WithField("action", "startup").WithError(err).
   149  			Fatal("modules didn't load")
   150  	}
   151  
   152  	// now that modules are loaded we can run the remaining config validation
   153  	// which is module dependent
   154  	if err := appState.ServerConfig.Config.Validate(appState.Modules); err != nil {
   155  		appState.Logger.
   156  			WithField("action", "startup").WithError(err).
   157  			Fatal("invalid config")
   158  	}
   159  
   160  	appState.ClusterHttpClient = reasonableHttpClient(appState.ServerConfig.Config.Cluster.AuthConfig)
   161  
   162  	var vectorRepo vectorRepo
   163  	var vectorMigrator migrate.Migrator
   164  	var migrator migrate.Migrator
   165  
   166  	if appState.ServerConfig.Config.Monitoring.Enabled {
   167  		promMetrics := monitoring.GetMetrics()
   168  		appState.Metrics = promMetrics
   169  	}
   170  
   171  	// TODO: configure http transport for efficient intra-cluster comm
   172  	remoteIndexClient := clients.NewRemoteIndex(appState.ClusterHttpClient)
   173  	remoteNodesClient := clients.NewRemoteNode(appState.ClusterHttpClient)
   174  	replicationClient := clients.NewReplicationClient(appState.ClusterHttpClient)
   175  	repo, err := db.New(appState.Logger, db.Config{
   176  		ServerVersion:             config.ServerVersion,
   177  		GitHash:                   config.GitHash,
   178  		MemtablesFlushDirtyAfter:  appState.ServerConfig.Config.Persistence.MemtablesFlushDirtyAfter,
   179  		MemtablesInitialSizeMB:    10,
   180  		MemtablesMaxSizeMB:        appState.ServerConfig.Config.Persistence.MemtablesMaxSizeMB,
   181  		MemtablesMinActiveSeconds: appState.ServerConfig.Config.Persistence.MemtablesMinActiveDurationSeconds,
   182  		MemtablesMaxActiveSeconds: appState.ServerConfig.Config.Persistence.MemtablesMaxActiveDurationSeconds,
   183  		RootPath:                  appState.ServerConfig.Config.Persistence.DataPath,
   184  		QueryLimit:                appState.ServerConfig.Config.QueryDefaults.Limit,
   185  		QueryMaximumResults:       appState.ServerConfig.Config.QueryMaximumResults,
   186  		QueryNestedRefLimit:       appState.ServerConfig.Config.QueryNestedCrossReferenceLimit,
   187  		MaxImportGoroutinesFactor: appState.ServerConfig.Config.MaxImportGoroutinesFactor,
   188  		TrackVectorDimensions:     appState.ServerConfig.Config.TrackVectorDimensions,
   189  		ResourceUsage:             appState.ServerConfig.Config.ResourceUsage,
   190  		AvoidMMap:                 appState.ServerConfig.Config.AvoidMmap,
   191  		DisableLazyLoadShards:     appState.ServerConfig.Config.DisableLazyLoadShards,
   192  		// Pass dummy replication config with minimum factor 1. Otherwise the
   193  		// setting is not backward-compatible. The user may have created a class
   194  		// with factor=1 before the change was introduced. Now their setup would no
   195  		// longer start up if the required minimum is now higher than 1. We want
   196  		// the required minimum to only apply to newly created classes - not block
   197  		// loading existing ones.
   198  		Replication: replication.GlobalConfig{MinimumFactor: 1},
   199  	}, remoteIndexClient, appState.Cluster, remoteNodesClient, replicationClient, appState.Metrics) // TODO client
   200  	if err != nil {
   201  		appState.Logger.
   202  			WithField("action", "startup").WithError(err).
   203  			Fatal("invalid new DB")
   204  	}
   205  
   206  	appState.DB = repo
   207  	vectorMigrator = db.NewMigrator(repo, appState.Logger)
   208  	vectorRepo = repo
   209  	migrator = vectorMigrator
   210  	explorer := traverser.NewExplorer(repo, appState.Logger, appState.Modules, traverser.NewMetrics(appState.Metrics), appState.ServerConfig.Config)
   211  	schemaRepo := schemarepo.NewStore(appState.ServerConfig.Config.Persistence.DataPath, appState.Logger)
   212  	if err = schemaRepo.Open(); err != nil {
   213  		appState.Logger.
   214  			WithField("action", "startup").WithError(err).
   215  			Fatal("could not initialize schema repo")
   216  		os.Exit(1)
   217  	}
   218  
   219  	localClassifierRepo, err := classifications.NewRepo(
   220  		appState.ServerConfig.Config.Persistence.DataPath, appState.Logger)
   221  	if err != nil {
   222  		appState.Logger.
   223  			WithField("action", "startup").WithError(err).
   224  			Fatal("could not initialize classifications repo")
   225  		os.Exit(1)
   226  	}
   227  
   228  	// TODO: configure http transport for efficient intra-cluster comm
   229  	classificationsTxClient := clients.NewClusterClassifications(appState.ClusterHttpClient)
   230  	classifierRepo := classifications.NewDistributeRepo(classificationsTxClient,
   231  		appState.Cluster, localClassifierRepo, appState.Logger)
   232  	appState.ClassificationRepo = classifierRepo
   233  
   234  	scaler := scaler.New(appState.Cluster, vectorRepo,
   235  		remoteIndexClient, appState.Logger, appState.ServerConfig.Config.Persistence.DataPath)
   236  	appState.Scaler = scaler
   237  
   238  	// TODO: configure http transport for efficient intra-cluster comm
   239  	schemaTxClient := clients.NewClusterSchema(appState.ClusterHttpClient)
   240  	schemaTxPersistence := txstore.NewStore(
   241  		appState.ServerConfig.Config.Persistence.DataPath, appState.Logger)
   242  	schemaTxPersistence.SetUmarshalFn(schemaUC.UnmarshalTransaction)
   243  	if err := schemaTxPersistence.Open(); err != nil {
   244  		appState.Logger.
   245  			WithField("action", "startup").WithError(err).
   246  			Fatal("could not open tx repo")
   247  		os.Exit(1)
   248  
   249  	}
   250  
   251  	schemaManager, err := schemaUC.NewManager(migrator, schemaRepo,
   252  		appState.Logger, appState.Authorizer, appState.ServerConfig.Config,
   253  		vectorIndex.ParseAndValidateConfig, appState.Modules, inverted.ValidateConfig,
   254  		appState.Modules, appState.Cluster, schemaTxClient,
   255  		schemaTxPersistence, scaler,
   256  	)
   257  	if err != nil {
   258  		appState.Logger.
   259  			WithField("action", "startup").WithError(err).
   260  			Fatal("could not initialize schema manager")
   261  		os.Exit(1)
   262  	}
   263  
   264  	appState.SchemaManager = schemaManager
   265  
   266  	appState.RemoteIndexIncoming = sharding.NewRemoteIndexIncoming(repo)
   267  	appState.RemoteNodeIncoming = sharding.NewRemoteNodeIncoming(repo)
   268  	appState.RemoteReplicaIncoming = replica.NewRemoteReplicaIncoming(repo)
   269  
   270  	backupManager := backup.NewHandler(appState.Logger, appState.Authorizer,
   271  		schemaManager, repo, appState.Modules)
   272  	appState.BackupManager = backupManager
   273  
   274  	enterrors.GoWrapper(func() { clusterapi.Serve(appState) }, appState.Logger)
   275  
   276  	vectorRepo.SetSchemaGetter(schemaManager)
   277  	explorer.SetSchemaGetter(schemaManager)
   278  	appState.Modules.SetSchemaGetter(schemaManager)
   279  
   280  	err = vectorRepo.WaitForStartup(ctx)
   281  	if err != nil {
   282  		appState.Logger.
   283  			WithError(err).
   284  			WithField("action", "startup").
   285  			Fatal("db didn't start up")
   286  		os.Exit(1)
   287  	}
   288  
   289  	if err := schemaManager.StartServing(ctx); err != nil {
   290  		appState.Logger.
   291  			WithError(err).
   292  			WithField("action", "startup").
   293  			Fatal("schema manager: resume dangling txs")
   294  		os.Exit(1)
   295  
   296  	}
   297  
   298  	batchManager := objects.NewBatchManager(vectorRepo, appState.Modules,
   299  		appState.Locks, schemaManager, appState.ServerConfig, appState.Logger,
   300  		appState.Authorizer, appState.Metrics)
   301  	appState.BatchManager = batchManager
   302  	objectsTraverser := traverser.NewTraverser(appState.ServerConfig, appState.Locks,
   303  		appState.Logger, appState.Authorizer, vectorRepo, explorer, schemaManager,
   304  		appState.Modules, traverser.NewMetrics(appState.Metrics),
   305  		appState.ServerConfig.Config.MaximumConcurrentGetRequests)
   306  	appState.Traverser = objectsTraverser
   307  
   308  	updateSchemaCallback := makeUpdateSchemaCall(appState.Logger, appState, objectsTraverser)
   309  	schemaManager.RegisterSchemaUpdateCallback(updateSchemaCallback)
   310  
   311  	err = migrator.AdjustFilterablePropSettings(ctx)
   312  	if err != nil {
   313  		appState.Logger.
   314  			WithError(err).
   315  			WithField("action", "adjustFilterablePropSettings").
   316  			Fatal("migration failed")
   317  		os.Exit(1)
   318  	}
   319  
   320  	// FIXME to avoid import cycles, tasks are passed as strings
   321  	reindexTaskNames := []string{}
   322  	var reindexCtx context.Context
   323  	reindexCtx, appState.ReindexCtxCancel = context.WithCancel(context.Background())
   324  	reindexFinished := make(chan error, 1)
   325  
   326  	if appState.ServerConfig.Config.ReindexSetToRoaringsetAtStartup {
   327  		reindexTaskNames = append(reindexTaskNames, "ShardInvertedReindexTaskSetToRoaringSet")
   328  	}
   329  	if appState.ServerConfig.Config.IndexMissingTextFilterableAtStartup {
   330  		reindexTaskNames = append(reindexTaskNames, "ShardInvertedReindexTaskMissingTextFilterable")
   331  	}
   332  	if len(reindexTaskNames) > 0 {
   333  		// start reindexing inverted indexes (if requested by user) in the background
   334  		// allowing db to complete api configuration and start handling requests
   335  		enterrors.GoWrapper(func() {
   336  			appState.Logger.
   337  				WithField("action", "startup").
   338  				Info("Reindexing inverted indexes")
   339  			reindexFinished <- migrator.InvertedReindex(reindexCtx, reindexTaskNames...)
   340  		}, appState.Logger)
   341  	}
   342  
   343  	configureServer = makeConfigureServer(appState)
   344  
   345  	// while we accept an overall longer startup, e.g. due to a recovery, we
   346  	// still want to limit the module startup context, as that's mostly service
   347  	// discovery / dependency checking
   348  	moduleCtx, cancel := context.WithTimeout(ctx, 120*time.Second)
   349  	defer cancel()
   350  
   351  	err = initModules(moduleCtx, appState)
   352  	if err != nil {
   353  		appState.Logger.
   354  			WithField("action", "startup").WithError(err).
   355  			Fatal("modules didn't initialize")
   356  	}
   357  
   358  	// manually update schema once
   359  	schema := schemaManager.GetSchemaSkipAuth()
   360  	updateSchemaCallback(schema)
   361  
   362  	// Add dimensions to all the objects in the database, if requested by the user
   363  	if appState.ServerConfig.Config.ReindexVectorDimensionsAtStartup {
   364  		appState.Logger.
   365  			WithField("action", "startup").
   366  			Info("Reindexing dimensions")
   367  		migrator.RecalculateVectorDimensions(ctx)
   368  	}
   369  
   370  	// Add recount properties of all the objects in the database, if requested by the user
   371  	if appState.ServerConfig.Config.RecountPropertiesAtStartup {
   372  		migrator.RecountProperties(ctx)
   373  	}
   374  
   375  	return appState
   376  }
   377  
   378  func configureAPI(api *operations.WeaviateAPI) http.Handler {
   379  	ctx := context.Background()
   380  	ctx, cancel := context.WithTimeout(ctx, 60*time.Minute)
   381  	defer cancel()
   382  
   383  	config.ServerVersion = parseVersionFromSwaggerSpec()
   384  	appState := MakeAppState(ctx, connectorOptionGroup)
   385  
   386  	api.ServeError = openapierrors.ServeError
   387  
   388  	api.JSONConsumer = runtime.JSONConsumer()
   389  
   390  	api.OidcAuth = composer.New(
   391  		appState.ServerConfig.Config.Authentication,
   392  		appState.APIKey, appState.OIDC)
   393  
   394  	api.Logger = func(msg string, args ...interface{}) {
   395  		appState.Logger.WithField("action", "restapi_management").Infof(msg, args...)
   396  	}
   397  
   398  	classifier := classification.New(appState.SchemaManager, appState.ClassificationRepo, appState.DB, // the DB is the vectorrepo
   399  		appState.Authorizer,
   400  		appState.Logger, appState.Modules)
   401  
   402  	setupSchemaHandlers(api, appState.SchemaManager, appState.Metrics, appState.Logger)
   403  	objectsManager := objects.NewManager(appState.Locks,
   404  		appState.SchemaManager, appState.ServerConfig, appState.Logger,
   405  		appState.Authorizer, appState.DB, appState.Modules,
   406  		objects.NewMetrics(appState.Metrics))
   407  	setupObjectHandlers(api, objectsManager, appState.ServerConfig.Config, appState.Logger,
   408  		appState.Modules, appState.Metrics)
   409  	setupObjectBatchHandlers(api, appState.BatchManager, appState.Metrics, appState.Logger)
   410  	setupGraphQLHandlers(api, appState, appState.SchemaManager, appState.ServerConfig.Config.DisableGraphQL,
   411  		appState.Metrics, appState.Logger)
   412  	setupMiscHandlers(api, appState.ServerConfig, appState.SchemaManager, appState.Modules,
   413  		appState.Metrics, appState.Logger)
   414  	setupClassificationHandlers(api, classifier, appState.Metrics, appState.Logger)
   415  	backupScheduler := backup.NewScheduler(
   416  		appState.Authorizer,
   417  		clients.NewClusterBackups(appState.ClusterHttpClient),
   418  		appState.DB, appState.Modules,
   419  		appState.Cluster,
   420  		appState.Logger)
   421  	setupBackupHandlers(api, backupScheduler, appState.Metrics, appState.Logger)
   422  	setupNodesHandlers(api, appState.SchemaManager, appState.DB, appState)
   423  
   424  	grpcServer := createGrpcServer(appState)
   425  	setupMiddlewares := makeSetupMiddlewares(appState)
   426  	setupGlobalMiddleware := makeSetupGlobalMiddleware(appState)
   427  
   428  	telemeter := telemetry.New(appState.DB, appState.Modules, appState.Logger)
   429  	if telemetryEnabled(appState) {
   430  		enterrors.GoWrapper(func() {
   431  			if err := telemeter.Start(context.Background()); err != nil {
   432  				appState.Logger.
   433  					WithField("action", "startup").
   434  					Errorf("telemetry failed to start: %s", err.Error())
   435  			}
   436  		}, appState.Logger)
   437  	}
   438  
   439  	api.ServerShutdown = func() {
   440  		if telemetryEnabled(appState) {
   441  			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   442  			defer cancel()
   443  			// must be shutdown before the db, to ensure the
   444  			// termination payload contains the correct
   445  			// object count
   446  			if err := telemeter.Stop(ctx); err != nil {
   447  				appState.Logger.WithField("action", "stop_telemetry").
   448  					Errorf("failed to stop telemetry: %s", err.Error())
   449  			}
   450  		}
   451  
   452  		// stop reindexing on server shutdown
   453  		appState.ReindexCtxCancel()
   454  
   455  		// gracefully stop gRPC server
   456  		grpcServer.GracefulStop()
   457  
   458  		ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
   459  		defer cancel()
   460  
   461  		if err := appState.SchemaManager.Shutdown(ctx); err != nil {
   462  			panic(err)
   463  		}
   464  
   465  		if err := appState.DB.Shutdown(ctx); err != nil {
   466  			panic(err)
   467  		}
   468  	}
   469  
   470  	startGrpcServer(grpcServer, appState)
   471  
   472  	return setupGlobalMiddleware(api.Serve(setupMiddlewares))
   473  }
   474  
   475  // TODO: Split up and don't write into global variables. Instead return an appState
   476  func startupRoutine(ctx context.Context, options *swag.CommandLineOptionsGroup) *state.State {
   477  	appState := &state.State{}
   478  
   479  	logger := logger()
   480  	appState.Logger = logger
   481  
   482  	logger.WithField("action", "startup").WithField("startup_time_left", timeTillDeadline(ctx)).
   483  		Debug("created startup context, nothing done so far")
   484  
   485  	// Load the config using the flags
   486  	serverConfig := &config.WeaviateConfig{}
   487  	appState.ServerConfig = serverConfig
   488  	err := serverConfig.LoadConfig(options, logger)
   489  	if err != nil {
   490  		logger.WithField("action", "startup").WithError(err).Error("could not load config")
   491  		logger.Exit(1)
   492  	}
   493  	dataPath := serverConfig.Config.Persistence.DataPath
   494  	if err := os.MkdirAll(dataPath, 0o777); err != nil {
   495  		logger.WithField("action", "startup").
   496  			WithField("path", dataPath).Error("cannot create data directory")
   497  		logger.Exit(1)
   498  	}
   499  
   500  	monitoring.InitConfig(serverConfig.Config.Monitoring)
   501  
   502  	if serverConfig.Config.DisableGraphQL {
   503  		logger.WithFields(logrus.Fields{
   504  			"action":          "startup",
   505  			"disable_graphql": true,
   506  		}).Warnf("GraphQL API disabled, relying only on gRPC API for querying. " +
   507  			"This is considered experimental and will likely experience breaking changes " +
   508  			"before reaching general availability")
   509  	}
   510  
   511  	logger.WithFields(logrus.Fields{
   512  		"action":                    "startup",
   513  		"default_vectorizer_module": serverConfig.Config.DefaultVectorizerModule,
   514  	}).Infof("the default vectorizer modules is set to %q, as a result all new "+
   515  		"schema classes without an explicit vectorizer setting, will use this "+
   516  		"vectorizer", serverConfig.Config.DefaultVectorizerModule)
   517  
   518  	logger.WithFields(logrus.Fields{
   519  		"action":              "startup",
   520  		"auto_schema_enabled": serverConfig.Config.AutoSchema.Enabled,
   521  	}).Infof("auto schema enabled setting is set to \"%v\"", serverConfig.Config.AutoSchema.Enabled)
   522  
   523  	logger.WithField("action", "startup").WithField("startup_time_left", timeTillDeadline(ctx)).
   524  		Debug("config loaded")
   525  
   526  	appState.OIDC = configureOIDC(appState)
   527  	appState.APIKey = configureAPIKey(appState)
   528  	appState.AnonymousAccess = configureAnonymousAccess(appState)
   529  	appState.Authorizer = configureAuthorizer(appState)
   530  
   531  	logger.WithField("action", "startup").WithField("startup_time_left", timeTillDeadline(ctx)).
   532  		Debug("configured OIDC and anonymous access client")
   533  
   534  	appState.Locks = &dummyLock{}
   535  
   536  	logger.WithField("action", "startup").WithField("startup_time_left", timeTillDeadline(ctx)).
   537  		Debug("initialized schema")
   538  
   539  	clusterState, err := cluster.Init(serverConfig.Config.Cluster, dataPath, logger)
   540  	if err != nil {
   541  		logger.WithField("action", "startup").WithError(err).
   542  			Error("could not init cluster state")
   543  		logger.Exit(1)
   544  	}
   545  
   546  	appState.Cluster = clusterState
   547  
   548  	appState.Logger.
   549  		WithField("action", "startup").
   550  		Debug("startup routine complete")
   551  
   552  	return appState
   553  }
   554  
   555  // logger does not parse the regular config object, as logging needs to be
   556  // configured before the configuration is even loaded/parsed. We are thus
   557  // "manually" reading the desired env vars and set reasonable defaults if they
   558  // are not set.
   559  //
   560  // Defaults to log level info and json format
   561  func logger() *logrus.Logger {
   562  	logger := logrus.New()
   563  	if os.Getenv("LOG_FORMAT") != "text" {
   564  		logger.SetFormatter(&logrus.JSONFormatter{})
   565  	}
   566  	switch os.Getenv("LOG_LEVEL") {
   567  	case "panic":
   568  		logger.SetLevel(logrus.PanicLevel)
   569  	case "fatal":
   570  		logger.SetLevel(logrus.FatalLevel)
   571  	case "error":
   572  		logger.SetLevel(logrus.ErrorLevel)
   573  	case "warn":
   574  		logger.SetLevel(logrus.WarnLevel)
   575  	case "warning":
   576  		logger.SetLevel(logrus.WarnLevel)
   577  	case "debug":
   578  		logger.SetLevel(logrus.DebugLevel)
   579  	case "trace":
   580  		logger.SetLevel(logrus.TraceLevel)
   581  	default:
   582  		logger.SetLevel(logrus.InfoLevel)
   583  	}
   584  
   585  	return logger
   586  }
   587  
   588  type dummyLock struct{}
   589  
   590  func (d *dummyLock) LockConnector() (func() error, error) {
   591  	return func() error { return nil }, nil
   592  }
   593  
   594  func (d *dummyLock) LockSchema() (func() error, error) {
   595  	return func() error { return nil }, nil
   596  }
   597  
   598  // everything hard-coded right now, to be made dynamic (from go plugins later)
   599  func registerModules(appState *state.State) error {
   600  	appState.Logger.
   601  		WithField("action", "startup").
   602  		Debug("start registering modules")
   603  
   604  	appState.Modules = modules.NewProvider()
   605  
   606  	enabledModules := map[string]bool{}
   607  	if len(appState.ServerConfig.Config.EnableModules) > 0 {
   608  		modules := strings.Split(appState.ServerConfig.Config.EnableModules, ",")
   609  		for _, module := range modules {
   610  			enabledModules[strings.TrimSpace(module)] = true
   611  		}
   612  	}
   613  
   614  	if _, ok := enabledModules["text2vec-contextionary"]; ok {
   615  		appState.Modules.Register(modcontextionary.New())
   616  		appState.Logger.
   617  			WithField("action", "startup").
   618  			WithField("module", "text2vec-contextionary").
   619  			Debug("enabled module")
   620  	}
   621  
   622  	if _, ok := enabledModules["text2vec-transformers"]; ok {
   623  		appState.Modules.Register(modtransformers.New())
   624  		appState.Logger.
   625  			WithField("action", "startup").
   626  			WithField("module", "text2vec-transformers").
   627  			Debug("enabled module")
   628  	}
   629  
   630  	if _, ok := enabledModules[modgpt4all.Name]; ok {
   631  		appState.Modules.Register(modgpt4all.New())
   632  		appState.Logger.
   633  			WithField("action", "startup").
   634  			WithField("module", modgpt4all.Name).
   635  			Debug("enabled module")
   636  	}
   637  
   638  	if _, ok := enabledModules[modrerankertransformers.Name]; ok {
   639  		appState.Modules.Register(modrerankertransformers.New())
   640  		appState.Logger.
   641  			WithField("action", "startup").
   642  			WithField("module", modrerankertransformers.Name).
   643  			Debug("enabled module")
   644  	}
   645  
   646  	if _, ok := enabledModules[modrerankercohere.Name]; ok {
   647  		appState.Modules.Register(modrerankercohere.New())
   648  		appState.Logger.
   649  			WithField("action", "startup").
   650  			WithField("module", modrerankercohere.Name).
   651  			Debug("enabled module")
   652  	}
   653  
   654  	if _, ok := enabledModules["qna-transformers"]; ok {
   655  		appState.Modules.Register(modqna.New())
   656  		appState.Logger.
   657  			WithField("action", "startup").
   658  			WithField("module", "qna-transformers").
   659  			Debug("enabled module")
   660  	}
   661  
   662  	if _, ok := enabledModules["sum-transformers"]; ok {
   663  		appState.Modules.Register(modsum.New())
   664  		appState.Logger.
   665  			WithField("action", "startup").
   666  			WithField("module", "sum-transformers").
   667  			Debug("enabled module")
   668  	}
   669  
   670  	if _, ok := enabledModules["img2vec-neural"]; ok {
   671  		appState.Modules.Register(modimage.New())
   672  		appState.Logger.
   673  			WithField("action", "startup").
   674  			WithField("module", "img2vec-neural").
   675  			Debug("enabled module")
   676  	}
   677  
   678  	if _, ok := enabledModules["ner-transformers"]; ok {
   679  		appState.Modules.Register(modner.New())
   680  		appState.Logger.
   681  			WithField("action", "startup").
   682  			WithField("module", "ner-transformers").
   683  			Debug("enabled module")
   684  	}
   685  
   686  	if _, ok := enabledModules["text-spellcheck"]; ok {
   687  		appState.Modules.Register(modspellcheck.New())
   688  		appState.Logger.
   689  			WithField("action", "startup").
   690  			WithField("module", "text-spellcheck").
   691  			Debug("enabled module")
   692  	}
   693  
   694  	if _, ok := enabledModules["multi2vec-clip"]; ok {
   695  		appState.Modules.Register(modclip.New())
   696  		appState.Logger.
   697  			WithField("action", "startup").
   698  			WithField("module", "multi2vec-clip").
   699  			Debug("enabled module")
   700  	}
   701  
   702  	if _, ok := enabledModules[modmulti2vecpalm.Name]; ok {
   703  		appState.Modules.Register(modmulti2vecpalm.New())
   704  		appState.Logger.
   705  			WithField("action", "startup").
   706  			WithField("module", modmulti2vecpalm.Name).
   707  			Debug("enabled module")
   708  	}
   709  
   710  	if _, ok := enabledModules["text2vec-openai"]; ok {
   711  		appState.Modules.Register(modopenai.New())
   712  		appState.Logger.
   713  			WithField("action", "startup").
   714  			WithField("module", "text2vec-openai").
   715  			Debug("enabled module")
   716  	}
   717  
   718  	if _, ok := enabledModules["qna-openai"]; ok {
   719  		appState.Modules.Register(modqnaopenai.New())
   720  		appState.Logger.
   721  			WithField("action", "startup").
   722  			WithField("module", "qna-openai").
   723  			Debug("enabled module")
   724  	}
   725  
   726  	if _, ok := enabledModules[modgenerativecohere.Name]; ok {
   727  		appState.Modules.Register(modgenerativecohere.New())
   728  		appState.Logger.
   729  			WithField("action", "startup").
   730  			WithField("module", modgenerativecohere.Name).
   731  			Debug("enabled module")
   732  	}
   733  
   734  	if _, ok := enabledModules[modgenerativemistral.Name]; ok {
   735  		appState.Modules.Register(modgenerativemistral.New())
   736  		appState.Logger.
   737  			WithField("action", "startup").
   738  			WithField("module", modgenerativemistral.Name).
   739  			Debug("enabled module")
   740  	}
   741  
   742  	if _, ok := enabledModules[modgenerativeopenai.Name]; ok {
   743  		appState.Modules.Register(modgenerativeopenai.New())
   744  		appState.Logger.
   745  			WithField("action", "startup").
   746  			WithField("module", modgenerativeopenai.Name).
   747  			Debug("enabled module")
   748  	}
   749  
   750  	if _, ok := enabledModules[modgenerativeaws.Name]; ok {
   751  		appState.Modules.Register(modgenerativeaws.New())
   752  		appState.Logger.
   753  			WithField("action", "startup").
   754  			WithField("module", modgenerativeaws.Name).
   755  			Debug("enabled module")
   756  	}
   757  
   758  	if _, ok := enabledModules[modhuggingface.Name]; ok {
   759  		appState.Modules.Register(modhuggingface.New())
   760  		appState.Logger.
   761  			WithField("action", "startup").
   762  			WithField("module", modhuggingface.Name).
   763  			Debug("enabled module")
   764  	}
   765  
   766  	if _, ok := enabledModules[modgenerativepalm.Name]; ok {
   767  		appState.Modules.Register(modgenerativepalm.New())
   768  		appState.Logger.
   769  			WithField("action", "startup").
   770  			WithField("module", modgenerativepalm.Name).
   771  			Debug("enabled module")
   772  	}
   773  
   774  	if _, ok := enabledModules[modgenerativeanyscale.Name]; ok {
   775  		appState.Modules.Register(modgenerativeanyscale.New())
   776  		appState.Logger.
   777  			WithField("action", "startup").
   778  			WithField("module", modgenerativeanyscale.Name).
   779  			Debug("enabled module")
   780  	}
   781  
   782  	if _, ok := enabledModules[modtext2vecpalm.Name]; ok {
   783  		appState.Modules.Register(modtext2vecpalm.New())
   784  		appState.Logger.
   785  			WithField("action", "startup").
   786  			WithField("module", modtext2vecpalm.Name).
   787  			Debug("enabled module")
   788  	}
   789  
   790  	if _, ok := enabledModules[modtext2vecaws.Name]; ok {
   791  		appState.Modules.Register(modtext2vecaws.New())
   792  		appState.Logger.
   793  			WithField("action", "startup").
   794  			WithField("module", modtext2vecaws.Name).
   795  			Debug("enabled module")
   796  	}
   797  
   798  	if _, ok := enabledModules[modstgfs.Name]; ok {
   799  		appState.Modules.Register(modstgfs.New())
   800  		appState.Logger.
   801  			WithField("action", "startup").
   802  			WithField("module", modstgfs.Name).
   803  			Debug("enabled module")
   804  	}
   805  
   806  	if _, ok := enabledModules[modstgs3.Name]; ok {
   807  		appState.Modules.Register(modstgs3.New())
   808  		appState.Logger.
   809  			WithField("action", "startup").
   810  			WithField("module", modstgs3.Name).
   811  			Debug("enabled module")
   812  	}
   813  
   814  	if _, ok := enabledModules[modstggcs.Name]; ok {
   815  		appState.Modules.Register(modstggcs.New())
   816  		appState.Logger.
   817  			WithField("action", "startup").
   818  			WithField("module", modstggcs.Name).
   819  			Debug("enabled module")
   820  	}
   821  
   822  	if _, ok := enabledModules[modstgazure.Name]; ok {
   823  		appState.Modules.Register(modstgazure.New())
   824  		appState.Logger.
   825  			WithField("action", "startup").
   826  			WithField("module", modstgazure.Name).
   827  			Debug("enabled module")
   828  	}
   829  
   830  	if _, ok := enabledModules[modcentroid.Name]; ok {
   831  		appState.Modules.Register(modcentroid.New())
   832  		appState.Logger.
   833  			WithField("action", "startup").
   834  			WithField("module", modcentroid.Name).
   835  			Debug("enabled module")
   836  	}
   837  
   838  	if _, ok := enabledModules[modcohere.Name]; ok {
   839  		appState.Modules.Register(modcohere.New())
   840  		appState.Logger.
   841  			WithField("action", "startup").
   842  			WithField("module", modcohere.Name).
   843  			Debug("enabled module")
   844  	}
   845  
   846  	if _, ok := enabledModules[modvoyageai.Name]; ok {
   847  		appState.Modules.Register(modvoyageai.New())
   848  		appState.Logger.
   849  			WithField("action", "startup").
   850  			WithField("module", modvoyageai.Name).
   851  			Debug("enabled module")
   852  	}
   853  
   854  	if _, ok := enabledModules[modbind.Name]; ok {
   855  		appState.Modules.Register(modbind.New())
   856  		appState.Logger.
   857  			WithField("action", "startup").
   858  			WithField("module", modbind.Name).
   859  			Debug("enabled module")
   860  	}
   861  
   862  	if _, ok := enabledModules[modjinaai.Name]; ok {
   863  		appState.Modules.Register(modjinaai.New())
   864  		appState.Logger.
   865  			WithField("action", "startup").
   866  			WithField("module", modjinaai.Name).
   867  			Debug("enabled module")
   868  	}
   869  
   870  	appState.Logger.
   871  		WithField("action", "startup").
   872  		Debug("completed registering modules")
   873  
   874  	return nil
   875  }
   876  
   877  func initModules(ctx context.Context, appState *state.State) error {
   878  	storageProvider, err := modulestorage.NewRepo(
   879  		appState.ServerConfig.Config.Persistence.DataPath, appState.Logger)
   880  	if err != nil {
   881  		return errors.Wrap(err, "init storage provider")
   882  	}
   883  
   884  	// TODO: gh-1481 don't pass entire appState in, but only what's needed. Probably only
   885  	// config?
   886  	moduleParams := moduletools.NewInitParams(storageProvider, appState,
   887  		appState.ServerConfig.Config, appState.Logger)
   888  
   889  	appState.Logger.
   890  		WithField("action", "startup").
   891  		Debug("start initializing modules")
   892  	if err := appState.Modules.Init(ctx, moduleParams, appState.Logger); err != nil {
   893  		return errors.Wrap(err, "init modules")
   894  	}
   895  
   896  	appState.Logger.
   897  		WithField("action", "startup").
   898  		Debug("finished initializing modules")
   899  
   900  	return nil
   901  }
   902  
   903  type clientWithAuth struct {
   904  	r         http.RoundTripper
   905  	basicAuth cluster.BasicAuth
   906  }
   907  
   908  func (c clientWithAuth) RoundTrip(r *http.Request) (*http.Response, error) {
   909  	r.SetBasicAuth(c.basicAuth.Username, c.basicAuth.Password)
   910  	return c.r.RoundTrip(r)
   911  }
   912  
   913  func reasonableHttpClient(authConfig cluster.AuthConfig) *http.Client {
   914  	t := &http.Transport{
   915  		Proxy: http.ProxyFromEnvironment,
   916  		DialContext: (&net.Dialer{
   917  			Timeout:   30 * time.Second,
   918  			KeepAlive: 120 * time.Second,
   919  		}).DialContext,
   920  		MaxIdleConnsPerHost:   100,
   921  		MaxIdleConns:          100,
   922  		IdleConnTimeout:       90 * time.Second,
   923  		TLSHandshakeTimeout:   10 * time.Second,
   924  		ExpectContinueTimeout: 1 * time.Second,
   925  	}
   926  	if authConfig.BasicAuth.Enabled() {
   927  		return &http.Client{Transport: clientWithAuth{r: t, basicAuth: authConfig.BasicAuth}}
   928  	}
   929  	return &http.Client{Transport: t}
   930  }
   931  
   932  func setupGoProfiling(config config.Config, logger logrus.FieldLogger) {
   933  	enterrors.GoWrapper(func() {
   934  		fmt.Println(http.ListenAndServe(":6060", nil))
   935  	}, logger)
   936  
   937  	if config.Profiling.BlockProfileRate > 0 {
   938  		goruntime.SetBlockProfileRate(config.Profiling.BlockProfileRate)
   939  	}
   940  
   941  	if config.Profiling.MutexProfileFraction > 0 {
   942  		goruntime.SetMutexProfileFraction(config.Profiling.MutexProfileFraction)
   943  	}
   944  }
   945  
   946  func parseVersionFromSwaggerSpec() string {
   947  	spec := struct {
   948  		Info struct {
   949  			Version string `json:"version"`
   950  		} `json:"info"`
   951  	}{}
   952  
   953  	err := json.Unmarshal(SwaggerJSON, &spec)
   954  	if err != nil {
   955  		panic(err)
   956  	}
   957  
   958  	return spec.Info.Version
   959  }
   960  
   961  func limitResources(appState *state.State) {
   962  	if os.Getenv("LIMIT_RESOURCES") == "true" {
   963  		appState.Logger.Info("Limiting resources:  memory: 80%, cores: all but one")
   964  		if os.Getenv("GOMAXPROCS") == "" {
   965  			// Fetch the number of cores from the cgroups cpuset
   966  			// and parse it into an int
   967  			cores, err := getCores()
   968  			if err == nil {
   969  				appState.Logger.WithField("cores", cores).
   970  					Warn("GOMAXPROCS not set, and unable to read from cgroups, setting to number of cores")
   971  				goruntime.GOMAXPROCS(cores)
   972  			} else {
   973  				cores = goruntime.NumCPU() - 1
   974  				if cores > 0 {
   975  					appState.Logger.WithField("cores", cores).
   976  						Warnf("Unable to read from cgroups: %v, setting to max cores to: %v", err, cores)
   977  					goruntime.GOMAXPROCS(cores)
   978  				}
   979  			}
   980  		}
   981  
   982  		limit, err := memlimit.SetGoMemLimit(0.8)
   983  		if err != nil {
   984  			appState.Logger.WithError(err).Warnf("Unable to set memory limit from cgroups: %v", err)
   985  			// Set memory limit to 90% of the available memory
   986  			limit := int64(float64(memory.TotalMemory()) * 0.8)
   987  			debug.SetMemoryLimit(limit)
   988  			appState.Logger.WithField("limit", limit).Info("Set memory limit based on available memory")
   989  		} else {
   990  			appState.Logger.WithField("limit", limit).Info("Set memory limit")
   991  		}
   992  	} else {
   993  		appState.Logger.Info("No resource limits set, weaviate will use all available memory and CPU. " +
   994  			"To limit resources, set LIMIT_RESOURCES=true")
   995  	}
   996  }
   997  
   998  func telemetryEnabled(state *state.State) bool {
   999  	return !state.ServerConfig.Config.DisableTelemetry
  1000  }