github.com/weaviate/weaviate@v1.24.6/usecases/schema/startup_cluster_sync_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package schema 13 14 import ( 15 "context" 16 "encoding/json" 17 "fmt" 18 "testing" 19 20 testlog "github.com/sirupsen/logrus/hooks/test" 21 "github.com/stretchr/testify/assert" 22 "github.com/stretchr/testify/require" 23 "github.com/weaviate/weaviate/entities/models" 24 "github.com/weaviate/weaviate/entities/replication" 25 "github.com/weaviate/weaviate/usecases/cluster" 26 "github.com/weaviate/weaviate/usecases/config" 27 "github.com/weaviate/weaviate/usecases/sharding" 28 ) 29 30 func TestStartupSync(t *testing.T) { 31 t.Run("new node joining, other nodes have schema", func(t *testing.T) { 32 clusterState := &fakeClusterState{ 33 hosts: []string{"node1", "node2"}, 34 } 35 36 txJSON, _ := json.Marshal(ReadSchemaPayload{ 37 Schema: &State{ 38 ObjectSchema: &models.Schema{ 39 Classes: []*models.Class{ 40 { 41 Class: "Bongourno", 42 VectorIndexType: "hnsw", 43 }, 44 }, 45 }, 46 }, 47 }) 48 49 txClient := &fakeTxClient{ 50 openInjectPayload: json.RawMessage(txJSON), 51 } 52 53 sm, err := newManagerWithClusterAndTx(t, clusterState, txClient, nil) 54 require.Nil(t, err) 55 56 localSchema := sm.GetSchemaSkipAuth() 57 assert.Equal(t, "Bongourno", localSchema.FindClassByName("Bongourno").Class) 58 59 st, _ := sm.ClusterStatus(context.Background()) 60 assert.False(t, st.IgnoreSchemaSync, "sync is indicated as not skipped") 61 assert.True(t, st.Healthy, "cluster is deemed healthy") 62 assert.Len(t, st.Error, 0, "no error is shown") 63 }) 64 65 t.Run("new node joining, other nodes have no schema", func(t *testing.T) { 66 clusterState := &fakeClusterState{ 67 hosts: []string{"node1", "node2"}, 68 } 69 70 txJSON, _ := json.Marshal(ReadSchemaPayload{ 71 Schema: &State{ 72 ObjectSchema: &models.Schema{ 73 Classes: []*models.Class{}, 74 }, 75 }, 76 }) 77 78 txClient := &fakeTxClient{ 79 openInjectPayload: json.RawMessage(txJSON), 80 } 81 82 sm, err := newManagerWithClusterAndTx(t, clusterState, txClient, nil) 83 require.Nil(t, err) 84 85 localSchema := sm.GetSchemaSkipAuth() 86 assert.Len(t, localSchema.Objects.Classes, 0) 87 88 st, _ := sm.ClusterStatus(context.Background()) 89 assert.False(t, st.IgnoreSchemaSync, "sync is indicated as not skipped") 90 assert.True(t, st.Healthy, "cluster is deemed healthy") 91 assert.Len(t, st.Error, 0, "no error is shown") 92 }) 93 94 t.Run("new node joining, conflict in schema between nodes", func(t *testing.T) { 95 clusterState := &fakeClusterState{ 96 hosts: []string{"node1", "node2"}, 97 skipRepair: true, 98 } 99 100 txJSON, _ := json.Marshal(ReadSchemaPayload{ 101 Schema: &State{ 102 ObjectSchema: &models.Schema{ 103 Classes: []*models.Class{ 104 { 105 Class: "Bongourno", 106 VectorIndexType: "hnsw", 107 }, 108 }, 109 }, 110 }, 111 }) 112 113 txClient := &fakeTxClient{ 114 openInjectPayload: json.RawMessage(txJSON), 115 } 116 117 _, err := newManagerWithClusterAndTx(t, clusterState, txClient, &State{ 118 ObjectSchema: &models.Schema{ 119 Classes: []*models.Class{ 120 { 121 Class: "Hola", 122 VectorIndexType: "hnsw", 123 }, 124 }, 125 }, 126 }) 127 require.NotNil(t, err) 128 assert.Contains(t, err.Error(), "corrupt") 129 }) 130 131 t.Run("conflict, but schema repaired", func(t *testing.T) { 132 clusterState := &fakeClusterState{ 133 hosts: []string{"node1", "node2"}, 134 } 135 136 txJSON, _ := json.Marshal(ReadSchemaPayload{ 137 Schema: &State{ 138 ObjectSchema: &models.Schema{ 139 Classes: []*models.Class{ 140 { 141 Class: "Bongourno", 142 VectorIndexType: "hnsw", 143 }, 144 }, 145 }, 146 }, 147 }) 148 149 txClient := &fakeTxClient{ 150 openInjectPayload: json.RawMessage(txJSON), 151 } 152 153 mgr, err := newManagerWithClusterAndTx(t, clusterState, txClient, &State{ 154 ObjectSchema: &models.Schema{ 155 Classes: []*models.Class{ 156 { 157 Class: "Hola", 158 VectorIndexType: "hnsw", 159 }, 160 }, 161 }, 162 }) 163 assert.Len(t, mgr.ObjectSchema.Classes, 2) 164 require.Nil(t, err, "expected nil err, got: %v", err) 165 }) 166 167 t.Run("conflict, but sync skipped -> no error", func(t *testing.T) { 168 clusterState := &fakeClusterState{ 169 hosts: []string{"node1", "node2"}, 170 syncIgnored: true, 171 skipRepair: true, 172 } 173 174 txJSON, _ := json.Marshal(ReadSchemaPayload{ 175 Schema: &State{ 176 ObjectSchema: &models.Schema{ 177 Classes: []*models.Class{ 178 { 179 Class: "Bongourno", 180 VectorIndexType: "hnsw", 181 }, 182 }, 183 }, 184 }, 185 }) 186 187 txClient := &fakeTxClient{ 188 openInjectPayload: json.RawMessage(txJSON), 189 } 190 191 m, err := newManagerWithClusterAndTx(t, clusterState, txClient, &State{ 192 ObjectSchema: &models.Schema{ 193 Classes: []*models.Class{ 194 { 195 Class: "Hola", 196 VectorIndexType: "hnsw", 197 }, 198 }, 199 }, 200 }) 201 require.Nil(t, err) 202 203 st, _ := m.ClusterStatus(context.Background()) 204 assert.True(t, st.IgnoreSchemaSync, "sync is indicated as skipped") 205 assert.False(t, st.Healthy, "cluster is not deemed healthy") 206 assert.True(t, len(st.Error) > 0, "the error is shown") 207 }) 208 209 t.Run("new node joining, agreement between all", func(t *testing.T) { 210 clusterState := &fakeClusterState{ 211 hosts: []string{"node1", "node2"}, 212 } 213 214 txJSON, _ := json.Marshal(ReadSchemaPayload{ 215 Schema: &State{ 216 ShardingState: map[string]*sharding.State{ 217 "GutenTag": {}, 218 }, 219 ObjectSchema: &models.Schema{ 220 Classes: []*models.Class{ 221 { 222 Class: "GutenTag", 223 VectorIndexType: "hnsw", 224 }, 225 }, 226 }, 227 }, 228 }) 229 230 txClient := &fakeTxClient{ 231 openInjectPayload: json.RawMessage(txJSON), 232 } 233 234 sm, err := newManagerWithClusterAndTx(t, clusterState, txClient, &State{ 235 ShardingState: map[string]*sharding.State{ 236 "GutenTag": {}, 237 }, 238 ObjectSchema: &models.Schema{ 239 Classes: []*models.Class{ 240 { 241 Class: "GutenTag", 242 VectorIndexType: "hnsw", 243 }, 244 }, 245 }, 246 }) 247 require.Nil(t, err) 248 249 localSchema := sm.GetSchemaSkipAuth() 250 assert.Equal(t, "GutenTag", localSchema.FindClassByName("GutenTag").Class) 251 }) 252 253 t.Run("new node joining, other nodes include an outdated version", func(t *testing.T) { 254 clusterState := &fakeClusterState{ 255 hosts: []string{"node1", "node2"}, 256 } 257 258 txClient := &fakeTxClient{ 259 openErr: fmt.Errorf("unrecognized schema transaction type"), 260 } 261 262 sm, err := newManagerWithClusterAndTx(t, clusterState, txClient, nil) 263 require.Nil(t, err) // no error, sync was skipped 264 265 schema := sm.GetSchemaSkipAuth() 266 assert.Len(t, schema.Objects.Classes, 0, "schema is still empty") 267 }) 268 269 t.Run("node with data (re-)joining, but other nodes are too old", func(t *testing.T) { 270 // we expect that sync would be skipped because the other nodes can't take 271 // part in the sync 272 clusterState := &fakeClusterState{ 273 hosts: []string{"node1", "node2"}, 274 } 275 276 txClient := &fakeTxClient{ 277 openErr: fmt.Errorf("unrecognized schema transaction type"), 278 } 279 280 sm, err := newManagerWithClusterAndTx(t, clusterState, txClient, &State{ 281 ObjectSchema: &models.Schema{ 282 Classes: []*models.Class{ 283 { 284 Class: "Hola", 285 VectorIndexType: "hnsw", 286 }, 287 }, 288 }, 289 }) 290 require.Nil(t, err) // startup sync was skipped, no error 291 schema := sm.GetSchemaSkipAuth() 292 require.Len(t, schema.Objects.Classes, 1, "schema is still the local schema") 293 assert.Equal(t, "Hola", schema.Objects.Classes[0].Class) 294 }) 295 296 t.Run("new node joining, schema identical, but other nodes have already been migrated", func(t *testing.T) { 297 // Migration refers to the change that happens when a node first starts 298 // up with v1.17. It reads the `belongsToNode` from the sharding config and 299 // writes the content into the new `belongsToNodes[]` array type. 300 // 301 // The timing of the migration vs the sync matters: The remote notes have 302 // already completed startup, therefore they have been migrated. If the 303 // local schema is not migrated yet, it could fail the checks even though 304 // it is logically identical. 305 clusterState := &fakeClusterState{ 306 hosts: []string{"node1", "node2"}, 307 } 308 309 txJSON, _ := json.Marshal(ReadSchemaPayload{ 310 Schema: &State{ 311 ShardingState: map[string]*sharding.State{ 312 "GutenTag": { 313 IndexID: "GutenTag", 314 Physical: map[string]sharding.Physical{ 315 "a-shard-of-beauty": { 316 Name: "a-shard-of-beauty", 317 BelongsToNodes: []string{"node-0"}, // Note the usage of the new field (!) 318 }, 319 }, 320 }, 321 }, 322 ObjectSchema: &models.Schema{ 323 Classes: []*models.Class{ 324 { 325 Class: "GutenTag", 326 VectorIndexType: "hnsw", 327 }, 328 }, 329 }, 330 }, 331 }) 332 333 txClient := &fakeTxClient{ 334 openInjectPayload: json.RawMessage(txJSON), 335 } 336 337 sm, err := newManagerWithClusterAndTx(t, clusterState, txClient, &State{ 338 ShardingState: map[string]*sharding.State{ 339 "GutenTag": { 340 IndexID: "GutenTag", 341 Physical: map[string]sharding.Physical{ 342 "a-shard-of-beauty": { 343 Name: "a-shard-of-beauty", 344 LegacyBelongsToNodeForBackwardCompat: "node-0", // Note the usage of the old field (!) 345 }, 346 }, 347 }, 348 }, 349 ObjectSchema: &models.Schema{ 350 Classes: []*models.Class{ 351 { 352 Class: "GutenTag", 353 VectorIndexType: "hnsw", 354 }, 355 }, 356 }, 357 }) 358 require.Nil(t, err) 359 360 localSchema := sm.GetSchemaSkipAuth() 361 assert.Equal(t, "GutenTag", localSchema.FindClassByName("GutenTag").Class) 362 }) 363 } 364 365 func TestStartupSyncUnhappyPaths(t *testing.T) { 366 type test struct { 367 name string 368 nodes []string 369 errContains string 370 txPayload interface{} 371 txOpenErr error 372 initialSchema *State 373 } 374 375 tests := []test{ 376 { 377 name: "corrupt cluster state: no nodes", 378 nodes: []string{}, 379 errContains: "cluster has size=0", 380 }, 381 { 382 name: "corrupt cluster state: name mismatch", 383 nodes: []string{"the-wrong-one"}, 384 errContains: "only node in the cluster does not match local", 385 }, 386 { 387 name: "open tx fails on empty node", 388 nodes: []string{"node1", "node2"}, 389 txOpenErr: cluster.ErrConcurrentTransaction, 390 errContains: "concurrent transaction", 391 }, 392 { 393 name: "open tx fails on populated node", 394 initialSchema: &State{ObjectSchema: &models.Schema{ 395 Classes: []*models.Class{{Class: "Foo", VectorIndexType: "hnsw"}}, 396 }}, 397 nodes: []string{"node1", "node2"}, 398 txOpenErr: cluster.ErrConcurrentTransaction, 399 errContains: "concurrent transaction", 400 }, 401 { 402 name: "wrong tx payload", 403 nodes: []string{"node1", "node2"}, 404 txPayload: "foo", 405 errContains: "unmarshal tx", 406 }, 407 } 408 409 for _, test := range tests { 410 t.Run(test.name, func(t *testing.T) { 411 clusterState := &fakeClusterState{ 412 hosts: test.nodes, 413 } 414 415 if test.txPayload == nil { 416 test.txPayload = ReadSchemaPayload{ 417 Schema: &State{ 418 ObjectSchema: &models.Schema{ 419 Classes: []*models.Class{ 420 { 421 Class: "Bongourno", 422 VectorIndexType: "hnsw", 423 }, 424 }, 425 }, 426 }, 427 } 428 } 429 430 txJSON, _ := json.Marshal(test.txPayload) 431 432 txClient := &fakeTxClient{ 433 openInjectPayload: json.RawMessage(txJSON), 434 openErr: test.txOpenErr, 435 } 436 437 _, err := newManagerWithClusterAndTx(t, clusterState, txClient, test.initialSchema) 438 require.NotNil(t, err) 439 assert.Contains(t, err.Error(), test.errContains) 440 }) 441 } 442 } 443 444 func newManagerWithClusterAndTx(t *testing.T, clusterState clusterState, 445 txClient cluster.Client, initialSchema *State, 446 ) (*Manager, error) { 447 logger, _ := testlog.NewNullLogger() 448 repo := newFakeRepo() 449 if initialSchema == nil { 450 initState := NewState(1) 451 initialSchema = &initState 452 } 453 repo.schema = *initialSchema 454 sm, err := NewManager(&NilMigrator{}, repo, logger, &fakeAuthorizer{}, 455 config.Config{ 456 DefaultVectorizerModule: config.VectorizerModuleNone, 457 Replication: replication.GlobalConfig{MinimumFactor: 1}, 458 }, 459 dummyParseVectorConfig, // only option for now 460 &fakeVectorizerValidator{}, dummyValidateInvertedConfig, 461 &fakeModuleConfig{}, clusterState, txClient, &fakeTxPersistence{}, &fakeScaleOutManager{}, 462 ) 463 464 return sm, err 465 }