github.com/weaviate/weaviate@v1.24.6/usecases/schema/startup_cluster_sync_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package schema
    13  
    14  import (
    15  	"context"
    16  	"encoding/json"
    17  	"fmt"
    18  	"testing"
    19  
    20  	testlog "github.com/sirupsen/logrus/hooks/test"
    21  	"github.com/stretchr/testify/assert"
    22  	"github.com/stretchr/testify/require"
    23  	"github.com/weaviate/weaviate/entities/models"
    24  	"github.com/weaviate/weaviate/entities/replication"
    25  	"github.com/weaviate/weaviate/usecases/cluster"
    26  	"github.com/weaviate/weaviate/usecases/config"
    27  	"github.com/weaviate/weaviate/usecases/sharding"
    28  )
    29  
    30  func TestStartupSync(t *testing.T) {
    31  	t.Run("new node joining, other nodes have schema", func(t *testing.T) {
    32  		clusterState := &fakeClusterState{
    33  			hosts: []string{"node1", "node2"},
    34  		}
    35  
    36  		txJSON, _ := json.Marshal(ReadSchemaPayload{
    37  			Schema: &State{
    38  				ObjectSchema: &models.Schema{
    39  					Classes: []*models.Class{
    40  						{
    41  							Class:           "Bongourno",
    42  							VectorIndexType: "hnsw",
    43  						},
    44  					},
    45  				},
    46  			},
    47  		})
    48  
    49  		txClient := &fakeTxClient{
    50  			openInjectPayload: json.RawMessage(txJSON),
    51  		}
    52  
    53  		sm, err := newManagerWithClusterAndTx(t, clusterState, txClient, nil)
    54  		require.Nil(t, err)
    55  
    56  		localSchema := sm.GetSchemaSkipAuth()
    57  		assert.Equal(t, "Bongourno", localSchema.FindClassByName("Bongourno").Class)
    58  
    59  		st, _ := sm.ClusterStatus(context.Background())
    60  		assert.False(t, st.IgnoreSchemaSync, "sync is indicated as not skipped")
    61  		assert.True(t, st.Healthy, "cluster is deemed healthy")
    62  		assert.Len(t, st.Error, 0, "no error is shown")
    63  	})
    64  
    65  	t.Run("new node joining, other nodes have no schema", func(t *testing.T) {
    66  		clusterState := &fakeClusterState{
    67  			hosts: []string{"node1", "node2"},
    68  		}
    69  
    70  		txJSON, _ := json.Marshal(ReadSchemaPayload{
    71  			Schema: &State{
    72  				ObjectSchema: &models.Schema{
    73  					Classes: []*models.Class{},
    74  				},
    75  			},
    76  		})
    77  
    78  		txClient := &fakeTxClient{
    79  			openInjectPayload: json.RawMessage(txJSON),
    80  		}
    81  
    82  		sm, err := newManagerWithClusterAndTx(t, clusterState, txClient, nil)
    83  		require.Nil(t, err)
    84  
    85  		localSchema := sm.GetSchemaSkipAuth()
    86  		assert.Len(t, localSchema.Objects.Classes, 0)
    87  
    88  		st, _ := sm.ClusterStatus(context.Background())
    89  		assert.False(t, st.IgnoreSchemaSync, "sync is indicated as not skipped")
    90  		assert.True(t, st.Healthy, "cluster is deemed healthy")
    91  		assert.Len(t, st.Error, 0, "no error is shown")
    92  	})
    93  
    94  	t.Run("new node joining, conflict in schema between nodes", func(t *testing.T) {
    95  		clusterState := &fakeClusterState{
    96  			hosts:      []string{"node1", "node2"},
    97  			skipRepair: true,
    98  		}
    99  
   100  		txJSON, _ := json.Marshal(ReadSchemaPayload{
   101  			Schema: &State{
   102  				ObjectSchema: &models.Schema{
   103  					Classes: []*models.Class{
   104  						{
   105  							Class:           "Bongourno",
   106  							VectorIndexType: "hnsw",
   107  						},
   108  					},
   109  				},
   110  			},
   111  		})
   112  
   113  		txClient := &fakeTxClient{
   114  			openInjectPayload: json.RawMessage(txJSON),
   115  		}
   116  
   117  		_, err := newManagerWithClusterAndTx(t, clusterState, txClient, &State{
   118  			ObjectSchema: &models.Schema{
   119  				Classes: []*models.Class{
   120  					{
   121  						Class:           "Hola",
   122  						VectorIndexType: "hnsw",
   123  					},
   124  				},
   125  			},
   126  		})
   127  		require.NotNil(t, err)
   128  		assert.Contains(t, err.Error(), "corrupt")
   129  	})
   130  
   131  	t.Run("conflict, but schema repaired", func(t *testing.T) {
   132  		clusterState := &fakeClusterState{
   133  			hosts: []string{"node1", "node2"},
   134  		}
   135  
   136  		txJSON, _ := json.Marshal(ReadSchemaPayload{
   137  			Schema: &State{
   138  				ObjectSchema: &models.Schema{
   139  					Classes: []*models.Class{
   140  						{
   141  							Class:           "Bongourno",
   142  							VectorIndexType: "hnsw",
   143  						},
   144  					},
   145  				},
   146  			},
   147  		})
   148  
   149  		txClient := &fakeTxClient{
   150  			openInjectPayload: json.RawMessage(txJSON),
   151  		}
   152  
   153  		mgr, err := newManagerWithClusterAndTx(t, clusterState, txClient, &State{
   154  			ObjectSchema: &models.Schema{
   155  				Classes: []*models.Class{
   156  					{
   157  						Class:           "Hola",
   158  						VectorIndexType: "hnsw",
   159  					},
   160  				},
   161  			},
   162  		})
   163  		assert.Len(t, mgr.ObjectSchema.Classes, 2)
   164  		require.Nil(t, err, "expected nil err, got: %v", err)
   165  	})
   166  
   167  	t.Run("conflict, but sync skipped -> no error", func(t *testing.T) {
   168  		clusterState := &fakeClusterState{
   169  			hosts:       []string{"node1", "node2"},
   170  			syncIgnored: true,
   171  			skipRepair:  true,
   172  		}
   173  
   174  		txJSON, _ := json.Marshal(ReadSchemaPayload{
   175  			Schema: &State{
   176  				ObjectSchema: &models.Schema{
   177  					Classes: []*models.Class{
   178  						{
   179  							Class:           "Bongourno",
   180  							VectorIndexType: "hnsw",
   181  						},
   182  					},
   183  				},
   184  			},
   185  		})
   186  
   187  		txClient := &fakeTxClient{
   188  			openInjectPayload: json.RawMessage(txJSON),
   189  		}
   190  
   191  		m, err := newManagerWithClusterAndTx(t, clusterState, txClient, &State{
   192  			ObjectSchema: &models.Schema{
   193  				Classes: []*models.Class{
   194  					{
   195  						Class:           "Hola",
   196  						VectorIndexType: "hnsw",
   197  					},
   198  				},
   199  			},
   200  		})
   201  		require.Nil(t, err)
   202  
   203  		st, _ := m.ClusterStatus(context.Background())
   204  		assert.True(t, st.IgnoreSchemaSync, "sync is indicated as skipped")
   205  		assert.False(t, st.Healthy, "cluster is not deemed healthy")
   206  		assert.True(t, len(st.Error) > 0, "the error is shown")
   207  	})
   208  
   209  	t.Run("new node joining, agreement between all", func(t *testing.T) {
   210  		clusterState := &fakeClusterState{
   211  			hosts: []string{"node1", "node2"},
   212  		}
   213  
   214  		txJSON, _ := json.Marshal(ReadSchemaPayload{
   215  			Schema: &State{
   216  				ShardingState: map[string]*sharding.State{
   217  					"GutenTag": {},
   218  				},
   219  				ObjectSchema: &models.Schema{
   220  					Classes: []*models.Class{
   221  						{
   222  							Class:           "GutenTag",
   223  							VectorIndexType: "hnsw",
   224  						},
   225  					},
   226  				},
   227  			},
   228  		})
   229  
   230  		txClient := &fakeTxClient{
   231  			openInjectPayload: json.RawMessage(txJSON),
   232  		}
   233  
   234  		sm, err := newManagerWithClusterAndTx(t, clusterState, txClient, &State{
   235  			ShardingState: map[string]*sharding.State{
   236  				"GutenTag": {},
   237  			},
   238  			ObjectSchema: &models.Schema{
   239  				Classes: []*models.Class{
   240  					{
   241  						Class:           "GutenTag",
   242  						VectorIndexType: "hnsw",
   243  					},
   244  				},
   245  			},
   246  		})
   247  		require.Nil(t, err)
   248  
   249  		localSchema := sm.GetSchemaSkipAuth()
   250  		assert.Equal(t, "GutenTag", localSchema.FindClassByName("GutenTag").Class)
   251  	})
   252  
   253  	t.Run("new node joining, other nodes include an outdated version", func(t *testing.T) {
   254  		clusterState := &fakeClusterState{
   255  			hosts: []string{"node1", "node2"},
   256  		}
   257  
   258  		txClient := &fakeTxClient{
   259  			openErr: fmt.Errorf("unrecognized schema transaction type"),
   260  		}
   261  
   262  		sm, err := newManagerWithClusterAndTx(t, clusterState, txClient, nil)
   263  		require.Nil(t, err) // no error, sync was skipped
   264  
   265  		schema := sm.GetSchemaSkipAuth()
   266  		assert.Len(t, schema.Objects.Classes, 0, "schema is still empty")
   267  	})
   268  
   269  	t.Run("node with data (re-)joining, but other nodes are too old", func(t *testing.T) {
   270  		// we expect that sync would be skipped because the other nodes can't take
   271  		// part in the sync
   272  		clusterState := &fakeClusterState{
   273  			hosts: []string{"node1", "node2"},
   274  		}
   275  
   276  		txClient := &fakeTxClient{
   277  			openErr: fmt.Errorf("unrecognized schema transaction type"),
   278  		}
   279  
   280  		sm, err := newManagerWithClusterAndTx(t, clusterState, txClient, &State{
   281  			ObjectSchema: &models.Schema{
   282  				Classes: []*models.Class{
   283  					{
   284  						Class:           "Hola",
   285  						VectorIndexType: "hnsw",
   286  					},
   287  				},
   288  			},
   289  		})
   290  		require.Nil(t, err) // startup sync was skipped, no error
   291  		schema := sm.GetSchemaSkipAuth()
   292  		require.Len(t, schema.Objects.Classes, 1, "schema is still the local schema")
   293  		assert.Equal(t, "Hola", schema.Objects.Classes[0].Class)
   294  	})
   295  
   296  	t.Run("new node joining, schema identical, but other nodes have already been migrated", func(t *testing.T) {
   297  		// Migration refers to the change that happens when a node first starts
   298  		// up with v1.17. It reads the `belongsToNode` from the sharding config and
   299  		// writes the content into the new `belongsToNodes[]` array type.
   300  		//
   301  		// The timing of the migration vs the sync matters: The remote notes have
   302  		// already completed startup, therefore they have been migrated. If the
   303  		// local schema is not migrated yet, it could fail the checks even though
   304  		// it is logically identical.
   305  		clusterState := &fakeClusterState{
   306  			hosts: []string{"node1", "node2"},
   307  		}
   308  
   309  		txJSON, _ := json.Marshal(ReadSchemaPayload{
   310  			Schema: &State{
   311  				ShardingState: map[string]*sharding.State{
   312  					"GutenTag": {
   313  						IndexID: "GutenTag",
   314  						Physical: map[string]sharding.Physical{
   315  							"a-shard-of-beauty": {
   316  								Name:           "a-shard-of-beauty",
   317  								BelongsToNodes: []string{"node-0"}, // Note the usage of the new field (!)
   318  							},
   319  						},
   320  					},
   321  				},
   322  				ObjectSchema: &models.Schema{
   323  					Classes: []*models.Class{
   324  						{
   325  							Class:           "GutenTag",
   326  							VectorIndexType: "hnsw",
   327  						},
   328  					},
   329  				},
   330  			},
   331  		})
   332  
   333  		txClient := &fakeTxClient{
   334  			openInjectPayload: json.RawMessage(txJSON),
   335  		}
   336  
   337  		sm, err := newManagerWithClusterAndTx(t, clusterState, txClient, &State{
   338  			ShardingState: map[string]*sharding.State{
   339  				"GutenTag": {
   340  					IndexID: "GutenTag",
   341  					Physical: map[string]sharding.Physical{
   342  						"a-shard-of-beauty": {
   343  							Name:                                 "a-shard-of-beauty",
   344  							LegacyBelongsToNodeForBackwardCompat: "node-0", // Note the usage of the old field (!)
   345  						},
   346  					},
   347  				},
   348  			},
   349  			ObjectSchema: &models.Schema{
   350  				Classes: []*models.Class{
   351  					{
   352  						Class:           "GutenTag",
   353  						VectorIndexType: "hnsw",
   354  					},
   355  				},
   356  			},
   357  		})
   358  		require.Nil(t, err)
   359  
   360  		localSchema := sm.GetSchemaSkipAuth()
   361  		assert.Equal(t, "GutenTag", localSchema.FindClassByName("GutenTag").Class)
   362  	})
   363  }
   364  
   365  func TestStartupSyncUnhappyPaths(t *testing.T) {
   366  	type test struct {
   367  		name          string
   368  		nodes         []string
   369  		errContains   string
   370  		txPayload     interface{}
   371  		txOpenErr     error
   372  		initialSchema *State
   373  	}
   374  
   375  	tests := []test{
   376  		{
   377  			name:        "corrupt cluster state: no nodes",
   378  			nodes:       []string{},
   379  			errContains: "cluster has size=0",
   380  		},
   381  		{
   382  			name:        "corrupt cluster state: name mismatch",
   383  			nodes:       []string{"the-wrong-one"},
   384  			errContains: "only node in the cluster does not match local",
   385  		},
   386  		{
   387  			name:        "open tx fails on empty node",
   388  			nodes:       []string{"node1", "node2"},
   389  			txOpenErr:   cluster.ErrConcurrentTransaction,
   390  			errContains: "concurrent transaction",
   391  		},
   392  		{
   393  			name: "open tx fails on populated node",
   394  			initialSchema: &State{ObjectSchema: &models.Schema{
   395  				Classes: []*models.Class{{Class: "Foo", VectorIndexType: "hnsw"}},
   396  			}},
   397  			nodes:       []string{"node1", "node2"},
   398  			txOpenErr:   cluster.ErrConcurrentTransaction,
   399  			errContains: "concurrent transaction",
   400  		},
   401  		{
   402  			name:        "wrong tx payload",
   403  			nodes:       []string{"node1", "node2"},
   404  			txPayload:   "foo",
   405  			errContains: "unmarshal tx",
   406  		},
   407  	}
   408  
   409  	for _, test := range tests {
   410  		t.Run(test.name, func(t *testing.T) {
   411  			clusterState := &fakeClusterState{
   412  				hosts: test.nodes,
   413  			}
   414  
   415  			if test.txPayload == nil {
   416  				test.txPayload = ReadSchemaPayload{
   417  					Schema: &State{
   418  						ObjectSchema: &models.Schema{
   419  							Classes: []*models.Class{
   420  								{
   421  									Class:           "Bongourno",
   422  									VectorIndexType: "hnsw",
   423  								},
   424  							},
   425  						},
   426  					},
   427  				}
   428  			}
   429  
   430  			txJSON, _ := json.Marshal(test.txPayload)
   431  
   432  			txClient := &fakeTxClient{
   433  				openInjectPayload: json.RawMessage(txJSON),
   434  				openErr:           test.txOpenErr,
   435  			}
   436  
   437  			_, err := newManagerWithClusterAndTx(t, clusterState, txClient, test.initialSchema)
   438  			require.NotNil(t, err)
   439  			assert.Contains(t, err.Error(), test.errContains)
   440  		})
   441  	}
   442  }
   443  
   444  func newManagerWithClusterAndTx(t *testing.T, clusterState clusterState,
   445  	txClient cluster.Client, initialSchema *State,
   446  ) (*Manager, error) {
   447  	logger, _ := testlog.NewNullLogger()
   448  	repo := newFakeRepo()
   449  	if initialSchema == nil {
   450  		initState := NewState(1)
   451  		initialSchema = &initState
   452  	}
   453  	repo.schema = *initialSchema
   454  	sm, err := NewManager(&NilMigrator{}, repo, logger, &fakeAuthorizer{},
   455  		config.Config{
   456  			DefaultVectorizerModule: config.VectorizerModuleNone,
   457  			Replication:             replication.GlobalConfig{MinimumFactor: 1},
   458  		},
   459  		dummyParseVectorConfig, // only option for now
   460  		&fakeVectorizerValidator{}, dummyValidateInvertedConfig,
   461  		&fakeModuleConfig{}, clusterState, txClient, &fakeTxPersistence{}, &fakeScaleOutManager{},
   462  	)
   463  
   464  	return sm, err
   465  }