github.com/weaviate/weaviate@v1.24.6/usecases/config/config_handler.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package config
    13  
    14  import (
    15  	"encoding/json"
    16  	"fmt"
    17  	"os"
    18  	"regexp"
    19  	"time"
    20  
    21  	"github.com/go-openapi/swag"
    22  	"github.com/pkg/errors"
    23  	"github.com/sirupsen/logrus"
    24  	"github.com/weaviate/weaviate/deprecations"
    25  	"github.com/weaviate/weaviate/entities/replication"
    26  	"github.com/weaviate/weaviate/entities/schema"
    27  	"github.com/weaviate/weaviate/entities/vectorindex/common"
    28  	"github.com/weaviate/weaviate/usecases/cluster"
    29  	"gopkg.in/yaml.v2"
    30  )
    31  
    32  // ServerVersion is set when the misc handlers are setup.
    33  // When misc handlers are setup, the entire swagger spec
    34  // is already being parsed for the server version. This is
    35  // a good time for us to set ServerVersion, so that the
    36  // spec only needs to be parsed once.
    37  var ServerVersion string
    38  
    39  // GitHash keeps the current git hash commit information
    40  var GitHash = "unknown"
    41  
    42  // DefaultConfigFile is the default file when no config file is provided
    43  const DefaultConfigFile string = "./weaviate.conf.json"
    44  
    45  // DefaultCleanupIntervalSeconds can be overwritten on a per-class basis
    46  const DefaultCleanupIntervalSeconds = int64(60)
    47  
    48  const (
    49  	// These BM25 tuning params can be overwritten on a per-class basis
    50  	DefaultBM25k1 = float32(1.2)
    51  	DefaultBM25b  = float32(0.75)
    52  )
    53  
    54  const (
    55  	DefaultMaxImportGoroutinesFactor = float64(1.5)
    56  
    57  	DefaultDiskUseWarningPercentage  = uint64(80)
    58  	DefaultDiskUseReadonlyPercentage = uint64(90)
    59  	DefaultMemUseWarningPercentage   = uint64(80)
    60  	// TODO: off by default for now, to make sure
    61  	//       the measurement is reliable. once
    62  	//       confirmed, we can set this to 90
    63  	DefaultMemUseReadonlyPercentage = uint64(0)
    64  )
    65  
    66  // Flags are input options
    67  type Flags struct {
    68  	ConfigFile string `long:"config-file" description:"path to config file (default: ./weaviate.conf.json)"`
    69  }
    70  
    71  // Config outline of the config file
    72  type Config struct {
    73  	Name                                string                   `json:"name" yaml:"name"`
    74  	Debug                               bool                     `json:"debug" yaml:"debug"`
    75  	QueryDefaults                       QueryDefaults            `json:"query_defaults" yaml:"query_defaults"`
    76  	QueryMaximumResults                 int64                    `json:"query_maximum_results" yaml:"query_maximum_results"`
    77  	QueryNestedCrossReferenceLimit      int64                    `json:"query_nested_cross_reference_limit" yaml:"query_nested_cross_reference_limit"`
    78  	Contextionary                       Contextionary            `json:"contextionary" yaml:"contextionary"`
    79  	Authentication                      Authentication           `json:"authentication" yaml:"authentication"`
    80  	Authorization                       Authorization            `json:"authorization" yaml:"authorization"`
    81  	Origin                              string                   `json:"origin" yaml:"origin"`
    82  	Persistence                         Persistence              `json:"persistence" yaml:"persistence"`
    83  	DefaultVectorizerModule             string                   `json:"default_vectorizer_module" yaml:"default_vectorizer_module"`
    84  	DefaultVectorDistanceMetric         string                   `json:"default_vector_distance_metric" yaml:"default_vector_distance_metric"`
    85  	EnableModules                       string                   `json:"enable_modules" yaml:"enable_modules"`
    86  	ModulesPath                         string                   `json:"modules_path" yaml:"modules_path"`
    87  	ModuleHttpClientTimeout             time.Duration            `json:"modules_client_timeout" yaml:"modules_client_timeout"`
    88  	AutoSchema                          AutoSchema               `json:"auto_schema" yaml:"auto_schema"`
    89  	Cluster                             cluster.Config           `json:"cluster" yaml:"cluster"`
    90  	Replication                         replication.GlobalConfig `json:"replication" yaml:"replication"`
    91  	Monitoring                          Monitoring               `json:"monitoring" yaml:"monitoring"`
    92  	GRPC                                GRPC                     `json:"grpc" yaml:"grpc"`
    93  	Profiling                           Profiling                `json:"profiling" yaml:"profiling"`
    94  	ResourceUsage                       ResourceUsage            `json:"resource_usage" yaml:"resource_usage"`
    95  	MaxImportGoroutinesFactor           float64                  `json:"max_import_goroutine_factor" yaml:"max_import_goroutine_factor"`
    96  	MaximumConcurrentGetRequests        int                      `json:"maximum_concurrent_get_requests" yaml:"maximum_concurrent_get_requests"`
    97  	TrackVectorDimensions               bool                     `json:"track_vector_dimensions" yaml:"track_vector_dimensions"`
    98  	ReindexVectorDimensionsAtStartup    bool                     `json:"reindex_vector_dimensions_at_startup" yaml:"reindex_vector_dimensions_at_startup"`
    99  	DisableLazyLoadShards               bool                     `json:"disable_lazy_load_shards" yaml:"disable_lazy_load_shards"`
   100  	RecountPropertiesAtStartup          bool                     `json:"recount_properties_at_startup" yaml:"recount_properties_at_startup"`
   101  	ReindexSetToRoaringsetAtStartup     bool                     `json:"reindex_set_to_roaringset_at_startup" yaml:"reindex_set_to_roaringset_at_startup"`
   102  	IndexMissingTextFilterableAtStartup bool                     `json:"index_missing_text_filterable_at_startup" yaml:"index_missing_text_filterable_at_startup"`
   103  	DisableGraphQL                      bool                     `json:"disable_graphql" yaml:"disable_graphql"`
   104  	AvoidMmap                           bool                     `json:"avoid_mmap" yaml:"avoid_mmap"`
   105  	CORS                                CORS                     `json:"cors" yaml:"cors"`
   106  	DisableTelemetry                    bool                     `json:"disable_telemetry" yaml:"disable_telemetry"`
   107  }
   108  
   109  type moduleProvider interface {
   110  	ValidateVectorizer(moduleName string) error
   111  }
   112  
   113  // Validate the non-nested parameters. Nested objects must provide their own
   114  // validation methods
   115  func (c Config) Validate(modProv moduleProvider) error {
   116  	if err := c.validateDefaultVectorizerModule(modProv); err != nil {
   117  		return errors.Wrap(err, "default vectorizer module")
   118  	}
   119  
   120  	if err := c.validateDefaultVectorDistanceMetric(); err != nil {
   121  		return errors.Wrap(err, "default vector distance metric")
   122  	}
   123  
   124  	return nil
   125  }
   126  
   127  func (c Config) validateDefaultVectorizerModule(modProv moduleProvider) error {
   128  	if c.DefaultVectorizerModule == VectorizerModuleNone {
   129  		return nil
   130  	}
   131  
   132  	return modProv.ValidateVectorizer(c.DefaultVectorizerModule)
   133  }
   134  
   135  func (c Config) validateDefaultVectorDistanceMetric() error {
   136  	switch c.DefaultVectorDistanceMetric {
   137  	case "", common.DistanceCosine, common.DistanceDot, common.DistanceL2Squared, common.DistanceManhattan, common.DistanceHamming:
   138  		return nil
   139  	default:
   140  		return fmt.Errorf("must be one of [\"cosine\", \"dot\", \"l2-squared\", \"manhattan\",\"hamming\"]")
   141  	}
   142  }
   143  
   144  type AutoSchema struct {
   145  	Enabled       bool   `json:"enabled" yaml:"enabled"`
   146  	DefaultString string `json:"defaultString" yaml:"defaultString"`
   147  	DefaultNumber string `json:"defaultNumber" yaml:"defaultNumber"`
   148  	DefaultDate   string `json:"defaultDate" yaml:"defaultDate"`
   149  }
   150  
   151  func (a AutoSchema) Validate() error {
   152  	if a.DefaultNumber != "int" && a.DefaultNumber != "number" {
   153  		return fmt.Errorf("autoSchema.defaultNumber must be either 'int' or 'number")
   154  	}
   155  	if a.DefaultString != schema.DataTypeText.String() &&
   156  		a.DefaultString != schema.DataTypeString.String() {
   157  		return fmt.Errorf("autoSchema.defaultString must be either 'string' or 'text")
   158  	}
   159  	if a.DefaultDate != "date" &&
   160  		a.DefaultDate != schema.DataTypeText.String() &&
   161  		a.DefaultDate != schema.DataTypeString.String() {
   162  		return fmt.Errorf("autoSchema.defaultDate must be either 'date' or 'string' or 'text")
   163  	}
   164  
   165  	return nil
   166  }
   167  
   168  // QueryDefaults for optional parameters
   169  type QueryDefaults struct {
   170  	Limit int64 `json:"limit" yaml:"limit"`
   171  }
   172  
   173  // DefaultQueryDefaultsLimit is the default query limit when no limit is provided
   174  const DefaultQueryDefaultsLimit int64 = 10
   175  
   176  type Contextionary struct {
   177  	URL string `json:"url" yaml:"url"`
   178  }
   179  
   180  type Monitoring struct {
   181  	Enabled bool   `json:"enabled" yaml:"enabled"`
   182  	Tool    string `json:"tool" yaml:"tool"`
   183  	Port    int    `json:"port" yaml:"port"`
   184  	Group   bool   `json:"group_classes" yaml:"group_classes"`
   185  }
   186  
   187  // Support independent TLS credentials for gRPC
   188  type GRPC struct {
   189  	Port     int    `json:"port" yaml:"port"`
   190  	CertFile string `json:"certFile" yaml:"certFile"`
   191  	KeyFile  string `json:"keyFile" yaml:"keyFile"`
   192  }
   193  
   194  type Profiling struct {
   195  	BlockProfileRate     int `json:"blockProfileRate" yaml:"blockProfileRate"`
   196  	MutexProfileFraction int `json:"mutexProfileFraction" yaml:"mutexProfileFraction"`
   197  }
   198  
   199  type Persistence struct {
   200  	DataPath                          string `json:"dataPath" yaml:"dataPath"`
   201  	MemtablesFlushDirtyAfter          int    `json:"flushDirtyMemtablesAfter" yaml:"flushDirtyMemtablesAfter"`
   202  	MemtablesMaxSizeMB                int    `json:"memtablesMaxSizeMB" yaml:"memtablesMaxSizeMB"`
   203  	MemtablesMinActiveDurationSeconds int    `json:"memtablesMinActiveDurationSeconds" yaml:"memtablesMinActiveDurationSeconds"`
   204  	MemtablesMaxActiveDurationSeconds int    `json:"memtablesMaxActiveDurationSeconds" yaml:"memtablesMaxActiveDurationSeconds"`
   205  }
   206  
   207  // DefaultPersistenceDataPath is the default location for data directory when no location is provided
   208  const DefaultPersistenceDataPath string = "./data"
   209  
   210  func (p Persistence) Validate() error {
   211  	if p.DataPath == "" {
   212  		return fmt.Errorf("persistence.dataPath must be set")
   213  	}
   214  
   215  	return nil
   216  }
   217  
   218  type DiskUse struct {
   219  	WarningPercentage  uint64 `json:"warning_percentage" yaml:"warning_percentage"`
   220  	ReadOnlyPercentage uint64 `json:"readonly_percentage" yaml:"readonly_percentage"`
   221  }
   222  
   223  func (d DiskUse) Validate() error {
   224  	if d.WarningPercentage > 100 {
   225  		return fmt.Errorf("disk_use.read_only_percentage must be between 0 and 100")
   226  	}
   227  
   228  	if d.ReadOnlyPercentage > 100 {
   229  		return fmt.Errorf("disk_use.read_only_percentage must be between 0 and 100")
   230  	}
   231  
   232  	return nil
   233  }
   234  
   235  type MemUse struct {
   236  	WarningPercentage  uint64 `json:"warning_percentage" yaml:"warning_percentage"`
   237  	ReadOnlyPercentage uint64 `json:"readonly_percentage" yaml:"readonly_percentage"`
   238  }
   239  
   240  func (m MemUse) Validate() error {
   241  	if m.WarningPercentage > 100 {
   242  		return fmt.Errorf("mem_use.read_only_percentage must be between 0 and 100")
   243  	}
   244  
   245  	if m.ReadOnlyPercentage > 100 {
   246  		return fmt.Errorf("mem_use.read_only_percentage must be between 0 and 100")
   247  	}
   248  
   249  	return nil
   250  }
   251  
   252  type ResourceUsage struct {
   253  	DiskUse DiskUse
   254  	MemUse  MemUse
   255  }
   256  
   257  type CORS struct {
   258  	AllowOrigin  string `json:"allow_origin" yaml:"allow_origin"`
   259  	AllowMethods string `json:"allow_methods" yaml:"allow_methods"`
   260  	AllowHeaders string `json:"allow_headers" yaml:"allow_headers"`
   261  }
   262  
   263  const (
   264  	DefaultCORSAllowOrigin  = "*"
   265  	DefaultCORSAllowMethods = "*"
   266  	DefaultCORSAllowHeaders = "Content-Type, Authorization, Batch, X-Openai-Api-Key, X-Openai-Organization, X-Openai-Baseurl, X-Anyscale-Baseurl, X-Anyscale-Api-Key, X-Cohere-Api-Key, X-Cohere-Baseurl, X-Huggingface-Api-Key, X-Azure-Api-Key, X-Google-Api-Key, X-Palm-Api-Key, X-Jinaai-Api-Key, X-Aws-Access-Key, X-Aws-Secret-Key, X-Voyageai-Baseurl, X-Voyageai-Api-Key, X-Mistral-Baseurl, X-Mistral-Api-Key"
   267  )
   268  
   269  func (r ResourceUsage) Validate() error {
   270  	if err := r.DiskUse.Validate(); err != nil {
   271  		return err
   272  	}
   273  
   274  	if err := r.MemUse.Validate(); err != nil {
   275  		return err
   276  	}
   277  
   278  	return nil
   279  }
   280  
   281  // GetConfigOptionGroup creates an option group for swagger
   282  func GetConfigOptionGroup() *swag.CommandLineOptionsGroup {
   283  	commandLineOptionsGroup := swag.CommandLineOptionsGroup{
   284  		ShortDescription: "Connector config & MQTT config",
   285  		LongDescription:  "",
   286  		Options:          &Flags{},
   287  	}
   288  
   289  	return &commandLineOptionsGroup
   290  }
   291  
   292  // WeaviateConfig represents the used schema's
   293  type WeaviateConfig struct {
   294  	Config   Config
   295  	Hostname string
   296  	Scheme   string
   297  }
   298  
   299  // GetHostAddress from config locations
   300  func (f *WeaviateConfig) GetHostAddress() string {
   301  	return fmt.Sprintf("%s://%s", f.Scheme, f.Hostname)
   302  }
   303  
   304  // LoadConfig from config locations
   305  func (f *WeaviateConfig) LoadConfig(flags *swag.CommandLineOptionsGroup, logger logrus.FieldLogger) error {
   306  	// Get command line flags
   307  	configFileName := flags.Options.(*Flags).ConfigFile
   308  	// Set default if not given
   309  	if configFileName == "" {
   310  		configFileName = DefaultConfigFile
   311  	}
   312  
   313  	// Read config file
   314  	file, err := os.ReadFile(configFileName)
   315  	_ = err // explicitly ignore
   316  
   317  	if len(file) > 0 {
   318  		logger.WithField("action", "config_load").WithField("config_file_path", configFileName).
   319  			Info("Usage of the weaviate.conf.json file is deprecated and will be removed in the future. Please use environment variables.")
   320  		config, err := f.parseConfigFile(file, configFileName)
   321  		if err != nil {
   322  			return configErr(err)
   323  		}
   324  		f.Config = config
   325  
   326  		deprecations.Log(logger, "config-files")
   327  	}
   328  
   329  	if err := FromEnv(&f.Config); err != nil {
   330  		return configErr(err)
   331  	}
   332  
   333  	if err := f.Config.Authentication.Validate(); err != nil {
   334  		return configErr(err)
   335  	}
   336  
   337  	if err := f.Config.Authorization.Validate(); err != nil {
   338  		return configErr(err)
   339  	}
   340  
   341  	if err := f.Config.Persistence.Validate(); err != nil {
   342  		return configErr(err)
   343  	}
   344  
   345  	if err := f.Config.AutoSchema.Validate(); err != nil {
   346  		return configErr(err)
   347  	}
   348  
   349  	if err := f.Config.ResourceUsage.Validate(); err != nil {
   350  		return configErr(err)
   351  	}
   352  
   353  	return nil
   354  }
   355  
   356  func (f *WeaviateConfig) parseConfigFile(file []byte, name string) (Config, error) {
   357  	var config Config
   358  
   359  	m := regexp.MustCompile(`.*\.(\w+)$`).FindStringSubmatch(name)
   360  	if len(m) < 2 {
   361  		return config, fmt.Errorf("config file does not have a file ending, got '%s'", name)
   362  	}
   363  
   364  	switch m[1] {
   365  	case "json":
   366  		err := json.Unmarshal(file, &config)
   367  		if err != nil {
   368  			return config, fmt.Errorf("error unmarshalling the json config file: %s", err)
   369  		}
   370  	case "yaml":
   371  		err := yaml.Unmarshal(file, &config)
   372  		if err != nil {
   373  			return config, fmt.Errorf("error unmarshalling the yaml config file: %s", err)
   374  		}
   375  	default:
   376  		return config, fmt.Errorf("unsupported config file extension '%s', use .yaml or .json", m[1])
   377  	}
   378  
   379  	return config, nil
   380  }
   381  
   382  func configErr(err error) error {
   383  	return fmt.Errorf("invalid config: %v", err)
   384  }