github.com/weaviate/weaviate@v1.24.6/usecases/config/config_handler.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package config 13 14 import ( 15 "encoding/json" 16 "fmt" 17 "os" 18 "regexp" 19 "time" 20 21 "github.com/go-openapi/swag" 22 "github.com/pkg/errors" 23 "github.com/sirupsen/logrus" 24 "github.com/weaviate/weaviate/deprecations" 25 "github.com/weaviate/weaviate/entities/replication" 26 "github.com/weaviate/weaviate/entities/schema" 27 "github.com/weaviate/weaviate/entities/vectorindex/common" 28 "github.com/weaviate/weaviate/usecases/cluster" 29 "gopkg.in/yaml.v2" 30 ) 31 32 // ServerVersion is set when the misc handlers are setup. 33 // When misc handlers are setup, the entire swagger spec 34 // is already being parsed for the server version. This is 35 // a good time for us to set ServerVersion, so that the 36 // spec only needs to be parsed once. 37 var ServerVersion string 38 39 // GitHash keeps the current git hash commit information 40 var GitHash = "unknown" 41 42 // DefaultConfigFile is the default file when no config file is provided 43 const DefaultConfigFile string = "./weaviate.conf.json" 44 45 // DefaultCleanupIntervalSeconds can be overwritten on a per-class basis 46 const DefaultCleanupIntervalSeconds = int64(60) 47 48 const ( 49 // These BM25 tuning params can be overwritten on a per-class basis 50 DefaultBM25k1 = float32(1.2) 51 DefaultBM25b = float32(0.75) 52 ) 53 54 const ( 55 DefaultMaxImportGoroutinesFactor = float64(1.5) 56 57 DefaultDiskUseWarningPercentage = uint64(80) 58 DefaultDiskUseReadonlyPercentage = uint64(90) 59 DefaultMemUseWarningPercentage = uint64(80) 60 // TODO: off by default for now, to make sure 61 // the measurement is reliable. once 62 // confirmed, we can set this to 90 63 DefaultMemUseReadonlyPercentage = uint64(0) 64 ) 65 66 // Flags are input options 67 type Flags struct { 68 ConfigFile string `long:"config-file" description:"path to config file (default: ./weaviate.conf.json)"` 69 } 70 71 // Config outline of the config file 72 type Config struct { 73 Name string `json:"name" yaml:"name"` 74 Debug bool `json:"debug" yaml:"debug"` 75 QueryDefaults QueryDefaults `json:"query_defaults" yaml:"query_defaults"` 76 QueryMaximumResults int64 `json:"query_maximum_results" yaml:"query_maximum_results"` 77 QueryNestedCrossReferenceLimit int64 `json:"query_nested_cross_reference_limit" yaml:"query_nested_cross_reference_limit"` 78 Contextionary Contextionary `json:"contextionary" yaml:"contextionary"` 79 Authentication Authentication `json:"authentication" yaml:"authentication"` 80 Authorization Authorization `json:"authorization" yaml:"authorization"` 81 Origin string `json:"origin" yaml:"origin"` 82 Persistence Persistence `json:"persistence" yaml:"persistence"` 83 DefaultVectorizerModule string `json:"default_vectorizer_module" yaml:"default_vectorizer_module"` 84 DefaultVectorDistanceMetric string `json:"default_vector_distance_metric" yaml:"default_vector_distance_metric"` 85 EnableModules string `json:"enable_modules" yaml:"enable_modules"` 86 ModulesPath string `json:"modules_path" yaml:"modules_path"` 87 ModuleHttpClientTimeout time.Duration `json:"modules_client_timeout" yaml:"modules_client_timeout"` 88 AutoSchema AutoSchema `json:"auto_schema" yaml:"auto_schema"` 89 Cluster cluster.Config `json:"cluster" yaml:"cluster"` 90 Replication replication.GlobalConfig `json:"replication" yaml:"replication"` 91 Monitoring Monitoring `json:"monitoring" yaml:"monitoring"` 92 GRPC GRPC `json:"grpc" yaml:"grpc"` 93 Profiling Profiling `json:"profiling" yaml:"profiling"` 94 ResourceUsage ResourceUsage `json:"resource_usage" yaml:"resource_usage"` 95 MaxImportGoroutinesFactor float64 `json:"max_import_goroutine_factor" yaml:"max_import_goroutine_factor"` 96 MaximumConcurrentGetRequests int `json:"maximum_concurrent_get_requests" yaml:"maximum_concurrent_get_requests"` 97 TrackVectorDimensions bool `json:"track_vector_dimensions" yaml:"track_vector_dimensions"` 98 ReindexVectorDimensionsAtStartup bool `json:"reindex_vector_dimensions_at_startup" yaml:"reindex_vector_dimensions_at_startup"` 99 DisableLazyLoadShards bool `json:"disable_lazy_load_shards" yaml:"disable_lazy_load_shards"` 100 RecountPropertiesAtStartup bool `json:"recount_properties_at_startup" yaml:"recount_properties_at_startup"` 101 ReindexSetToRoaringsetAtStartup bool `json:"reindex_set_to_roaringset_at_startup" yaml:"reindex_set_to_roaringset_at_startup"` 102 IndexMissingTextFilterableAtStartup bool `json:"index_missing_text_filterable_at_startup" yaml:"index_missing_text_filterable_at_startup"` 103 DisableGraphQL bool `json:"disable_graphql" yaml:"disable_graphql"` 104 AvoidMmap bool `json:"avoid_mmap" yaml:"avoid_mmap"` 105 CORS CORS `json:"cors" yaml:"cors"` 106 DisableTelemetry bool `json:"disable_telemetry" yaml:"disable_telemetry"` 107 } 108 109 type moduleProvider interface { 110 ValidateVectorizer(moduleName string) error 111 } 112 113 // Validate the non-nested parameters. Nested objects must provide their own 114 // validation methods 115 func (c Config) Validate(modProv moduleProvider) error { 116 if err := c.validateDefaultVectorizerModule(modProv); err != nil { 117 return errors.Wrap(err, "default vectorizer module") 118 } 119 120 if err := c.validateDefaultVectorDistanceMetric(); err != nil { 121 return errors.Wrap(err, "default vector distance metric") 122 } 123 124 return nil 125 } 126 127 func (c Config) validateDefaultVectorizerModule(modProv moduleProvider) error { 128 if c.DefaultVectorizerModule == VectorizerModuleNone { 129 return nil 130 } 131 132 return modProv.ValidateVectorizer(c.DefaultVectorizerModule) 133 } 134 135 func (c Config) validateDefaultVectorDistanceMetric() error { 136 switch c.DefaultVectorDistanceMetric { 137 case "", common.DistanceCosine, common.DistanceDot, common.DistanceL2Squared, common.DistanceManhattan, common.DistanceHamming: 138 return nil 139 default: 140 return fmt.Errorf("must be one of [\"cosine\", \"dot\", \"l2-squared\", \"manhattan\",\"hamming\"]") 141 } 142 } 143 144 type AutoSchema struct { 145 Enabled bool `json:"enabled" yaml:"enabled"` 146 DefaultString string `json:"defaultString" yaml:"defaultString"` 147 DefaultNumber string `json:"defaultNumber" yaml:"defaultNumber"` 148 DefaultDate string `json:"defaultDate" yaml:"defaultDate"` 149 } 150 151 func (a AutoSchema) Validate() error { 152 if a.DefaultNumber != "int" && a.DefaultNumber != "number" { 153 return fmt.Errorf("autoSchema.defaultNumber must be either 'int' or 'number") 154 } 155 if a.DefaultString != schema.DataTypeText.String() && 156 a.DefaultString != schema.DataTypeString.String() { 157 return fmt.Errorf("autoSchema.defaultString must be either 'string' or 'text") 158 } 159 if a.DefaultDate != "date" && 160 a.DefaultDate != schema.DataTypeText.String() && 161 a.DefaultDate != schema.DataTypeString.String() { 162 return fmt.Errorf("autoSchema.defaultDate must be either 'date' or 'string' or 'text") 163 } 164 165 return nil 166 } 167 168 // QueryDefaults for optional parameters 169 type QueryDefaults struct { 170 Limit int64 `json:"limit" yaml:"limit"` 171 } 172 173 // DefaultQueryDefaultsLimit is the default query limit when no limit is provided 174 const DefaultQueryDefaultsLimit int64 = 10 175 176 type Contextionary struct { 177 URL string `json:"url" yaml:"url"` 178 } 179 180 type Monitoring struct { 181 Enabled bool `json:"enabled" yaml:"enabled"` 182 Tool string `json:"tool" yaml:"tool"` 183 Port int `json:"port" yaml:"port"` 184 Group bool `json:"group_classes" yaml:"group_classes"` 185 } 186 187 // Support independent TLS credentials for gRPC 188 type GRPC struct { 189 Port int `json:"port" yaml:"port"` 190 CertFile string `json:"certFile" yaml:"certFile"` 191 KeyFile string `json:"keyFile" yaml:"keyFile"` 192 } 193 194 type Profiling struct { 195 BlockProfileRate int `json:"blockProfileRate" yaml:"blockProfileRate"` 196 MutexProfileFraction int `json:"mutexProfileFraction" yaml:"mutexProfileFraction"` 197 } 198 199 type Persistence struct { 200 DataPath string `json:"dataPath" yaml:"dataPath"` 201 MemtablesFlushDirtyAfter int `json:"flushDirtyMemtablesAfter" yaml:"flushDirtyMemtablesAfter"` 202 MemtablesMaxSizeMB int `json:"memtablesMaxSizeMB" yaml:"memtablesMaxSizeMB"` 203 MemtablesMinActiveDurationSeconds int `json:"memtablesMinActiveDurationSeconds" yaml:"memtablesMinActiveDurationSeconds"` 204 MemtablesMaxActiveDurationSeconds int `json:"memtablesMaxActiveDurationSeconds" yaml:"memtablesMaxActiveDurationSeconds"` 205 } 206 207 // DefaultPersistenceDataPath is the default location for data directory when no location is provided 208 const DefaultPersistenceDataPath string = "./data" 209 210 func (p Persistence) Validate() error { 211 if p.DataPath == "" { 212 return fmt.Errorf("persistence.dataPath must be set") 213 } 214 215 return nil 216 } 217 218 type DiskUse struct { 219 WarningPercentage uint64 `json:"warning_percentage" yaml:"warning_percentage"` 220 ReadOnlyPercentage uint64 `json:"readonly_percentage" yaml:"readonly_percentage"` 221 } 222 223 func (d DiskUse) Validate() error { 224 if d.WarningPercentage > 100 { 225 return fmt.Errorf("disk_use.read_only_percentage must be between 0 and 100") 226 } 227 228 if d.ReadOnlyPercentage > 100 { 229 return fmt.Errorf("disk_use.read_only_percentage must be between 0 and 100") 230 } 231 232 return nil 233 } 234 235 type MemUse struct { 236 WarningPercentage uint64 `json:"warning_percentage" yaml:"warning_percentage"` 237 ReadOnlyPercentage uint64 `json:"readonly_percentage" yaml:"readonly_percentage"` 238 } 239 240 func (m MemUse) Validate() error { 241 if m.WarningPercentage > 100 { 242 return fmt.Errorf("mem_use.read_only_percentage must be between 0 and 100") 243 } 244 245 if m.ReadOnlyPercentage > 100 { 246 return fmt.Errorf("mem_use.read_only_percentage must be between 0 and 100") 247 } 248 249 return nil 250 } 251 252 type ResourceUsage struct { 253 DiskUse DiskUse 254 MemUse MemUse 255 } 256 257 type CORS struct { 258 AllowOrigin string `json:"allow_origin" yaml:"allow_origin"` 259 AllowMethods string `json:"allow_methods" yaml:"allow_methods"` 260 AllowHeaders string `json:"allow_headers" yaml:"allow_headers"` 261 } 262 263 const ( 264 DefaultCORSAllowOrigin = "*" 265 DefaultCORSAllowMethods = "*" 266 DefaultCORSAllowHeaders = "Content-Type, Authorization, Batch, X-Openai-Api-Key, X-Openai-Organization, X-Openai-Baseurl, X-Anyscale-Baseurl, X-Anyscale-Api-Key, X-Cohere-Api-Key, X-Cohere-Baseurl, X-Huggingface-Api-Key, X-Azure-Api-Key, X-Google-Api-Key, X-Palm-Api-Key, X-Jinaai-Api-Key, X-Aws-Access-Key, X-Aws-Secret-Key, X-Voyageai-Baseurl, X-Voyageai-Api-Key, X-Mistral-Baseurl, X-Mistral-Api-Key" 267 ) 268 269 func (r ResourceUsage) Validate() error { 270 if err := r.DiskUse.Validate(); err != nil { 271 return err 272 } 273 274 if err := r.MemUse.Validate(); err != nil { 275 return err 276 } 277 278 return nil 279 } 280 281 // GetConfigOptionGroup creates an option group for swagger 282 func GetConfigOptionGroup() *swag.CommandLineOptionsGroup { 283 commandLineOptionsGroup := swag.CommandLineOptionsGroup{ 284 ShortDescription: "Connector config & MQTT config", 285 LongDescription: "", 286 Options: &Flags{}, 287 } 288 289 return &commandLineOptionsGroup 290 } 291 292 // WeaviateConfig represents the used schema's 293 type WeaviateConfig struct { 294 Config Config 295 Hostname string 296 Scheme string 297 } 298 299 // GetHostAddress from config locations 300 func (f *WeaviateConfig) GetHostAddress() string { 301 return fmt.Sprintf("%s://%s", f.Scheme, f.Hostname) 302 } 303 304 // LoadConfig from config locations 305 func (f *WeaviateConfig) LoadConfig(flags *swag.CommandLineOptionsGroup, logger logrus.FieldLogger) error { 306 // Get command line flags 307 configFileName := flags.Options.(*Flags).ConfigFile 308 // Set default if not given 309 if configFileName == "" { 310 configFileName = DefaultConfigFile 311 } 312 313 // Read config file 314 file, err := os.ReadFile(configFileName) 315 _ = err // explicitly ignore 316 317 if len(file) > 0 { 318 logger.WithField("action", "config_load").WithField("config_file_path", configFileName). 319 Info("Usage of the weaviate.conf.json file is deprecated and will be removed in the future. Please use environment variables.") 320 config, err := f.parseConfigFile(file, configFileName) 321 if err != nil { 322 return configErr(err) 323 } 324 f.Config = config 325 326 deprecations.Log(logger, "config-files") 327 } 328 329 if err := FromEnv(&f.Config); err != nil { 330 return configErr(err) 331 } 332 333 if err := f.Config.Authentication.Validate(); err != nil { 334 return configErr(err) 335 } 336 337 if err := f.Config.Authorization.Validate(); err != nil { 338 return configErr(err) 339 } 340 341 if err := f.Config.Persistence.Validate(); err != nil { 342 return configErr(err) 343 } 344 345 if err := f.Config.AutoSchema.Validate(); err != nil { 346 return configErr(err) 347 } 348 349 if err := f.Config.ResourceUsage.Validate(); err != nil { 350 return configErr(err) 351 } 352 353 return nil 354 } 355 356 func (f *WeaviateConfig) parseConfigFile(file []byte, name string) (Config, error) { 357 var config Config 358 359 m := regexp.MustCompile(`.*\.(\w+)$`).FindStringSubmatch(name) 360 if len(m) < 2 { 361 return config, fmt.Errorf("config file does not have a file ending, got '%s'", name) 362 } 363 364 switch m[1] { 365 case "json": 366 err := json.Unmarshal(file, &config) 367 if err != nil { 368 return config, fmt.Errorf("error unmarshalling the json config file: %s", err) 369 } 370 case "yaml": 371 err := yaml.Unmarshal(file, &config) 372 if err != nil { 373 return config, fmt.Errorf("error unmarshalling the yaml config file: %s", err) 374 } 375 default: 376 return config, fmt.Errorf("unsupported config file extension '%s', use .yaml or .json", m[1]) 377 } 378 379 return config, nil 380 } 381 382 func configErr(err error) error { 383 return fmt.Errorf("invalid config: %v", err) 384 }