github.com/weaviate/weaviate@v1.24.6/usecases/schema/manager.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package schema 13 14 import ( 15 "context" 16 "fmt" 17 "sync" 18 19 enterrors "github.com/weaviate/weaviate/entities/errors" 20 21 "github.com/pkg/errors" 22 "github.com/sirupsen/logrus" 23 "github.com/weaviate/weaviate/entities/models" 24 "github.com/weaviate/weaviate/entities/replication" 25 "github.com/weaviate/weaviate/entities/schema" 26 "github.com/weaviate/weaviate/usecases/cluster" 27 "github.com/weaviate/weaviate/usecases/config" 28 "github.com/weaviate/weaviate/usecases/replica" 29 "github.com/weaviate/weaviate/usecases/scaler" 30 "github.com/weaviate/weaviate/usecases/schema/migrate" 31 "github.com/weaviate/weaviate/usecases/sharding" 32 ) 33 34 // Manager Manages schema changes at a use-case level, i.e. agnostic of 35 // underlying databases or storage providers 36 type Manager struct { 37 migrator migrate.Migrator 38 repo SchemaStore 39 callbacks []func(updatedSchema schema.Schema) 40 logger logrus.FieldLogger 41 Authorizer authorizer 42 config config.Config 43 vectorizerValidator VectorizerValidator 44 moduleConfig ModuleConfig 45 cluster *cluster.TxManager 46 clusterState clusterState 47 configParser VectorConfigParser 48 invertedConfigValidator InvertedConfigValidator 49 scaleOut scaleOut 50 RestoreStatus sync.Map 51 RestoreError sync.Map 52 sync.RWMutex 53 54 // As outlined in [*cluster.TxManager.TryResumeDanglingTxs] the current 55 // implementation isn't perfect. It does not actually know if a tx was meant 56 // to be committed or not. Instead we do a simple workaround. We check if the 57 // schema is out of sync and only then do we try to resume transactions. 58 shouldTryToResumeTx bool 59 60 schemaCache 61 } 62 63 type VectorConfigParser func(in interface{}, vectorIndexType string) (schema.VectorIndexConfig, error) 64 65 type InvertedConfigValidator func(in *models.InvertedIndexConfig) error 66 67 type SchemaGetter interface { 68 GetSchemaSkipAuth() schema.Schema 69 Nodes() []string 70 NodeName() string 71 ClusterHealthScore() int 72 ResolveParentNodes(string, string) (map[string]string, error) 73 74 CopyShardingState(class string) *sharding.State 75 ShardOwner(class, shard string) (string, error) 76 TenantShard(class, tenant string) (string, string) 77 ShardFromUUID(class string, uuid []byte) string 78 ShardReplicas(class, shard string) ([]string, error) 79 } 80 81 type VectorizerValidator interface { 82 ValidateVectorizer(moduleName string) error 83 } 84 85 type ModuleConfig interface { 86 SetClassDefaults(class *models.Class) 87 SetSinglePropertyDefaults(class *models.Class, prop *models.Property) 88 ValidateClass(ctx context.Context, class *models.Class) error 89 } 90 91 // SchemaStore is responsible for persisting the schema 92 // by providing support for both partial and complete schema updates 93 type SchemaStore interface { 94 // Save saves the complete schema to the persistent storage 95 Save(ctx context.Context, schema State) error 96 97 // Load loads the complete schema from the persistent storage 98 Load(context.Context) (State, error) 99 100 // NewClass creates a new class if it doesn't exists, otherwise return an error 101 NewClass(context.Context, ClassPayload) error 102 103 // UpdateClass if it exists, otherwise return an error 104 UpdateClass(context.Context, ClassPayload) error 105 106 // DeleteClass deletes class 107 DeleteClass(ctx context.Context, class string) error 108 109 // NewShards creates new shards of an existing class 110 NewShards(ctx context.Context, class string, shards []KeyValuePair) error 111 112 // UpdateShards updates (replaces) shards of on existing class 113 // Error is returned if class or shard does not exist 114 UpdateShards(ctx context.Context, class string, shards []KeyValuePair) error 115 116 // DeleteShards deletes shards from a class 117 // If the class or a shard does not exist then nothing is done and a nil error is returned 118 DeleteShards(ctx context.Context, class string, shards []string) error 119 } 120 121 // KeyValuePair is used to serialize shards updates 122 type KeyValuePair struct { 123 Key string 124 Value []byte 125 } 126 127 // ClassPayload is used to serialize class updates 128 type ClassPayload struct { 129 Name string 130 Metadata []byte 131 ShardingState []byte 132 Shards []KeyValuePair 133 ReplaceShards bool 134 Error error 135 } 136 137 type clusterState interface { 138 // Hostnames initializes a broadcast 139 Hostnames() []string 140 141 // AllNames initializes shard distribution across nodes 142 AllNames() []string 143 Candidates() []string 144 LocalName() string 145 NodeCount() int 146 NodeHostname(nodeName string) (string, bool) 147 148 // ClusterHealthScore gets the whole cluster health, the lower number the better 149 ClusterHealthScore() int 150 151 SchemaSyncIgnored() bool 152 SkipSchemaRepair() bool 153 } 154 155 type scaleOut interface { 156 SetSchemaManager(sm scaler.SchemaManager) 157 Scale(ctx context.Context, className string, 158 updated sharding.Config, prevReplFactor, newReplFactor int64) (*sharding.State, error) 159 } 160 161 // NewManager creates a new manager 162 func NewManager(migrator migrate.Migrator, repo SchemaStore, 163 logger logrus.FieldLogger, authorizer authorizer, config config.Config, 164 configParser VectorConfigParser, vectorizerValidator VectorizerValidator, 165 invertedConfigValidator InvertedConfigValidator, 166 moduleConfig ModuleConfig, clusterState clusterState, 167 txClient cluster.Client, txPersistence cluster.Persistence, 168 scaleoutManager scaleOut, 169 ) (*Manager, error) { 170 txBroadcaster := cluster.NewTxBroadcaster(clusterState, txClient, logger) 171 m := &Manager{ 172 config: config, 173 migrator: migrator, 174 repo: repo, 175 schemaCache: schemaCache{State: State{}}, 176 logger: logger, 177 Authorizer: authorizer, 178 configParser: configParser, 179 vectorizerValidator: vectorizerValidator, 180 invertedConfigValidator: invertedConfigValidator, 181 moduleConfig: moduleConfig, 182 cluster: cluster.NewTxManager(txBroadcaster, txPersistence, logger), 183 clusterState: clusterState, 184 scaleOut: scaleoutManager, 185 } 186 187 m.scaleOut.SetSchemaManager(m) 188 189 m.cluster.SetCommitFn(m.handleCommit) 190 m.cluster.SetResponseFn(m.handleTxResponse) 191 m.cluster.SetAllowUnready(allowUnreadyTxs) 192 txBroadcaster.SetConsensusFunction(newReadConsensus(m.parseConfigs, m.logger)) 193 194 err := m.loadOrInitializeSchema(context.Background()) 195 if err != nil { 196 return nil, fmt.Errorf("could not load or initialize schema: %v", err) 197 } 198 199 return m, nil 200 } 201 202 func (m *Manager) Shutdown(ctx context.Context) error { 203 allCommitsDone := make(chan struct{}) 204 enterrors.GoWrapper(func() { 205 m.cluster.Shutdown() 206 allCommitsDone <- struct{}{} 207 }, m.logger) 208 209 select { 210 case <-ctx.Done(): 211 return fmt.Errorf("waiting for transactions to commit: %w", ctx.Err()) 212 case <-allCommitsDone: 213 return nil 214 } 215 } 216 217 func (m *Manager) TxManager() *cluster.TxManager { 218 return m.cluster 219 } 220 221 type authorizer interface { 222 Authorize(principal *models.Principal, verb, resource string) error 223 } 224 225 func (m *Manager) saveSchema(ctx context.Context, st State) error { 226 m.logger. 227 WithField("action", "schema.save"). 228 Debug("saving updated schema to configuration store") 229 230 if err := m.repo.Save(ctx, st); err != nil { 231 return err 232 } 233 m.triggerSchemaUpdateCallbacks() 234 return nil 235 } 236 237 // RegisterSchemaUpdateCallback allows other usecases to register a primitive 238 // type update callback. The callbacks will be called any time we persist a 239 // schema update 240 func (m *Manager) RegisterSchemaUpdateCallback(callback func(updatedSchema schema.Schema)) { 241 m.callbacks = append(m.callbacks, callback) 242 } 243 244 func (m *Manager) triggerSchemaUpdateCallbacks() { 245 schema := m.getSchema() 246 247 for _, cb := range m.callbacks { 248 cb(schema) 249 } 250 } 251 252 func (m *Manager) loadOrInitializeSchema(ctx context.Context) error { 253 localSchema, err := m.repo.Load(ctx) 254 if err != nil { 255 return fmt.Errorf("could not load schema: %v", err) 256 } 257 if err := m.parseConfigs(ctx, &localSchema); err != nil { 258 return errors.Wrap(err, "load schema") 259 } 260 261 if err := m.migrateSchemaIfNecessary(ctx, &localSchema); err != nil { 262 return fmt.Errorf("migrate schema: %w", err) 263 } 264 265 // There was a bug that allowed adding the same prop multiple times. This 266 // leads to a race at startup. If an instance is already affected by this, 267 // this step can remove the duplicate ones. 268 // 269 // See https://github.com/weaviate/weaviate/issues/2609 270 for _, c := range localSchema.ObjectSchema.Classes { 271 c.Properties = m.deduplicateProps(c.Properties, c.Class) 272 } 273 274 // set internal state since it is used by startupClusterSync 275 m.schemaCache.setState(localSchema) 276 277 // make sure that all migrations have completed before checking sync, 278 // otherwise two identical schemas might fail the check based on form rather 279 // than content 280 281 if err := m.startupClusterSync(ctx); err != nil { 282 return errors.Wrap(err, "sync schema with other nodes in the cluster") 283 } 284 285 // store in persistent storage 286 // TODO: investigate if save() is redundant because it is called in startupClusterSync() 287 err = m.RLockGuard(func() error { return m.repo.Save(ctx, m.schemaCache.State) }) 288 if err != nil { 289 return fmt.Errorf("store to persistent storage: %v", err) 290 } 291 292 return nil 293 } 294 295 // StartServing indicates that the schema manager is ready to accept incoming 296 // connections in cluster mode, i.e. it will accept opening transactions. 297 // 298 // Some transactions are exempt, such as ReadSchema which is required for nodes 299 // to start up. 300 // 301 // This method should be called when all backends, primarily the DB, are ready 302 // to serve. 303 func (m *Manager) StartServing(ctx context.Context) error { 304 if err := m.resumeDanglingTransactions(ctx); err != nil { 305 return err 306 } 307 308 // only start accepting incoming connections when dangling txs have been 309 // resumed, otherwise there is potential for conflict 310 m.cluster.StartAcceptIncoming() 311 312 return nil 313 } 314 315 // resumeDanglingTransactions iterates over any transaction that may have been left 316 // dangling after a restart and retries to commit them if appropriate. 317 // 318 // This can only be called when all areas responding to side effects of 319 // committing a transaction are ready. In practice this means, the DB must be 320 // ready to try and call this method. 321 func (m *Manager) resumeDanglingTransactions(ctx context.Context) error { 322 var shouldResume bool 323 m.RLockGuard(func() error { 324 shouldResume = m.shouldTryToResumeTx 325 return nil 326 }) 327 328 if !shouldResume { 329 // nothing to do for us 330 return nil 331 } 332 333 ok, err := m.cluster.TryResumeDanglingTxs(ctx, resumableTxs) 334 if err != nil { 335 return fmt.Errorf("try resuming dangling transactions: %w", err) 336 } 337 338 if !ok { 339 // no tx was applied, we are done 340 return nil 341 } 342 343 // a tx was applied which means the previous schema check was skipped, we 344 // now need to check the schema again 345 err = m.validateSchemaCorruption(ctx) 346 if err == nil { 347 // all is fine, continue as normal 348 return nil 349 } 350 351 if m.clusterState.SchemaSyncIgnored() { 352 m.logger.WithError(err).WithFields(logrusStartupSyncFields()). 353 Warning("schema out of sync, but ignored because " + 354 "CLUSTER_IGNORE_SCHEMA_SYNC=true") 355 return nil 356 } 357 358 return fmt.Errorf( 359 "applied dangling tx, but schema still out of sync: %w", err) 360 } 361 362 func (m *Manager) migrateSchemaIfNecessary(ctx context.Context, localSchema *State) error { 363 // introduced when Weaviate started supporting multi-shards per class in v1.8 364 if err := m.checkSingleShardMigration(ctx, localSchema); err != nil { 365 return errors.Wrap(err, "migrating sharding state from previous version") 366 } 367 368 // introduced when Weaviate started supporting replication in v1.17 369 if err := m.checkShardingStateForReplication(ctx, localSchema); err != nil { 370 return errors.Wrap(err, "migrating sharding state from previous version (before replication)") 371 } 372 373 // if other migrations become necessary in the future, you can add them here. 374 return nil 375 } 376 377 func (m *Manager) checkSingleShardMigration(ctx context.Context, localSchema *State) error { 378 for _, c := range localSchema.ObjectSchema.Classes { 379 if _, ok := localSchema.ShardingState[c.Class]; ok { // there is sharding state for this class. Nothing to do 380 continue 381 } 382 383 m.logger.WithField("className", c.Class).WithField("action", "initialize_schema"). 384 Warningf("No sharding state found for class %q, initializing new state. "+ 385 "This is expected behavior if the schema was created with an older Weaviate "+ 386 "version, prior to supporting multi-shard indices.", c.Class) 387 388 // there is no sharding state for this class, let's create the correct 389 // config. This class must have been created prior to the sharding feature, 390 // so we now that the shardCount==1 - we do not care about any of the other 391 // parameters and simply use the defaults for those 392 c.ShardingConfig = map[string]interface{}{ 393 "desiredCount": 1, 394 } 395 if err := m.parseShardingConfig(ctx, c); err != nil { 396 return err 397 } 398 399 if err := replica.ValidateConfig(c, m.config.Replication); err != nil { 400 return fmt.Errorf("validate replication config: %w", err) 401 } 402 shardState, err := sharding.InitState(c.Class, 403 c.ShardingConfig.(sharding.Config), 404 m.clusterState, c.ReplicationConfig.Factor, 405 schema.MultiTenancyEnabled(c)) 406 if err != nil { 407 return errors.Wrap(err, "init sharding state") 408 } 409 410 if localSchema.ShardingState == nil { 411 localSchema.ShardingState = map[string]*sharding.State{} 412 } 413 localSchema.ShardingState[c.Class] = shardState 414 415 } 416 417 return nil 418 } 419 420 func (m *Manager) checkShardingStateForReplication(ctx context.Context, localSchema *State) error { 421 for _, classState := range localSchema.ShardingState { 422 classState.MigrateFromOldFormat() 423 } 424 return nil 425 } 426 427 func newSchema() *State { 428 return &State{ 429 ObjectSchema: &models.Schema{ 430 Classes: []*models.Class{}, 431 }, 432 ShardingState: map[string]*sharding.State{}, 433 } 434 } 435 436 func (m *Manager) parseConfigs(ctx context.Context, schema *State) error { 437 for _, class := range schema.ObjectSchema.Classes { 438 for _, prop := range class.Properties { 439 setPropertyDefaults(prop) 440 migratePropertySettings(prop) 441 } 442 443 if err := m.parseVectorIndexConfig(ctx, class); err != nil { 444 return errors.Wrapf(err, "class %s: vector index config", class.Class) 445 } 446 447 if err := m.parseShardingConfig(ctx, class); err != nil { 448 return errors.Wrapf(err, "class %s: sharding config", class.Class) 449 } 450 451 // Pass dummy replication config with minimum factor 1. Otherwise the 452 // setting is not backward-compatible. The user may have created a class 453 // with factor=1 before the change was introduced. Now their setup would no 454 // longer start up if the required minimum is now higher than 1. We want 455 // the required minimum to only apply to newly created classes - not block 456 // loading existing ones. 457 if err := replica.ValidateConfig(class, replication.GlobalConfig{MinimumFactor: 1}); err != nil { 458 return fmt.Errorf("replication config: %w", err) 459 } 460 } 461 m.schemaCache.LockGuard(func() { 462 for _, shardState := range schema.ShardingState { 463 shardState.SetLocalName(m.clusterState.LocalName()) 464 } 465 }) 466 467 return nil 468 }