github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/clusterintegrationtest/cluster_integration_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 //go:build integrationTest 13 // +build integrationTest 14 15 package clusterintegrationtest 16 17 import ( 18 "context" 19 "encoding/json" 20 "fmt" 21 "math/rand" 22 "strconv" 23 "strings" 24 "testing" 25 "time" 26 27 "github.com/stretchr/testify/assert" 28 "github.com/stretchr/testify/require" 29 "github.com/weaviate/weaviate/adapters/repos/db" 30 "github.com/weaviate/weaviate/entities/additional" 31 "github.com/weaviate/weaviate/entities/aggregation" 32 "github.com/weaviate/weaviate/entities/dto" 33 "github.com/weaviate/weaviate/entities/filters" 34 "github.com/weaviate/weaviate/entities/models" 35 "github.com/weaviate/weaviate/entities/schema" 36 "github.com/weaviate/weaviate/entities/search" 37 "github.com/weaviate/weaviate/entities/searchparams" 38 "github.com/weaviate/weaviate/usecases/objects" 39 ) 40 41 const ( 42 vectorDims = 20 43 numberOfNodes = 10 44 distributedClass = "Distributed" 45 ) 46 47 // TestDistributedSetup uses as many real components and only mocks out 48 // non-essential parts. Essentially we fix the shard/cluster state and schema 49 // as they aren't critical to this test, but use real repos and real HTTP APIs 50 // between the repos. 51 func TestDistributedSetup(t *testing.T) { 52 t.Run("individual imports", func(t *testing.T) { 53 dirName := setupDirectory(t) 54 r := getRandomSeed() 55 testDistributed(t, dirName, r, false) 56 }) 57 t.Run("batched imports", func(t *testing.T) { 58 dirName := setupDirectory(t) 59 r := getRandomSeed() 60 testDistributed(t, dirName, r, true) 61 }) 62 } 63 64 func testDistributed(t *testing.T, dirName string, rnd *rand.Rand, batch bool) { 65 var nodes []*node 66 numberOfObjects := 200 67 68 t.Run("setup", func(t *testing.T) { 69 overallShardState := multiShardState(numberOfNodes) 70 shardStateSerialized, err := json.Marshal(overallShardState) 71 require.Nil(t, err) 72 73 for i := 0; i < numberOfNodes; i++ { 74 node := &node{ 75 name: fmt.Sprintf("node-%d", i), 76 } 77 78 node.init(dirName, shardStateSerialized, &nodes) 79 nodes = append(nodes, node) 80 } 81 }) 82 83 t.Run("apply schema", func(t *testing.T) { 84 for i := range nodes { 85 err := nodes[i].migrator.AddClass(context.Background(), class(), 86 nodes[i].schemaManager.shardState) 87 require.Nil(t, err) 88 err = nodes[i].migrator.AddClass(context.Background(), secondClassWithRef(), 89 nodes[i].schemaManager.shardState) 90 require.Nil(t, err) 91 nodes[i].schemaManager.schema.Objects.Classes = append(nodes[i].schemaManager.schema.Objects.Classes, 92 class(), secondClassWithRef()) 93 } 94 }) 95 96 data := exampleData(numberOfObjects) 97 refData := exampleDataWithRefs(numberOfObjects, 5, data) 98 99 if batch { 100 t.Run("import large batch from random node", func(t *testing.T) { 101 // pick a random node, but send the entire batch to this node 102 node := nodes[rnd.Intn(len(nodes))] 103 104 batchObjs := dataAsBatch(data) 105 res, err := node.repo.BatchPutObjects(context.Background(), batchObjs, nil) 106 require.Nil(t, err) 107 for _, ind := range res { 108 require.Nil(t, ind.Err) 109 } 110 }) 111 112 t.Run("import second class without refs", func(t *testing.T) { 113 // pick a random node, but send the entire batch to this node 114 node := nodes[rnd.Intn(len(nodes))] 115 116 batchObjs := dataAsBatchWithProps(refData, []string{"description"}) 117 res, err := node.repo.BatchPutObjects(context.Background(), batchObjs, nil) 118 require.Nil(t, err) 119 for _, ind := range res { 120 require.Nil(t, ind.Err) 121 } 122 }) 123 124 t.Run("import refs as batch", func(t *testing.T) { 125 // pick a random node, but send the entire batch to this node 126 node := nodes[rnd.Intn(len(nodes))] 127 128 batch := refsAsBatch(refData, "toFirst") 129 res, err := node.repo.AddBatchReferences(context.Background(), batch, nil) 130 require.Nil(t, err) 131 for _, ind := range res { 132 require.Nil(t, ind.Err) 133 } 134 }) 135 } else { 136 t.Run("import first class by picking a random node", func(t *testing.T) { 137 for _, obj := range data { 138 node := nodes[rnd.Intn(len(nodes))] 139 140 err := node.repo.PutObject(context.Background(), obj, obj.Vector, nil, nil) 141 require.Nil(t, err) 142 } 143 }) 144 t.Run("import second class with refs by picking a random node", func(t *testing.T) { 145 for _, obj := range refData { 146 node := nodes[rnd.Intn(len(nodes))] 147 148 err := node.repo.PutObject(context.Background(), obj, obj.Vector, nil, nil) 149 require.Nil(t, err) 150 151 } 152 }) 153 } 154 t.Run("query individually to check if all exist using random nodes", func(t *testing.T) { 155 for _, obj := range data { 156 node := nodes[rnd.Intn(len(nodes))] 157 158 ok, err := node.repo.Exists(context.Background(), distributedClass, obj.ID, nil, "") 159 require.Nil(t, err) 160 assert.True(t, ok) 161 } 162 }) 163 164 t.Run("query individually using random node", func(t *testing.T) { 165 for _, obj := range data { 166 node := nodes[rnd.Intn(len(nodes))] 167 168 res, err := node.repo.ObjectByID(context.Background(), obj.ID, search.SelectProperties{}, additional.Properties{}, "") 169 require.Nil(t, err) 170 require.NotNil(t, res) 171 172 // only compare string prop to avoid having to deal with parsing time 173 // props 174 assert.Equal(t, obj.Properties.(map[string]interface{})["description"], 175 res.Object().Properties.(map[string]interface{})["description"]) 176 } 177 }) 178 179 t.Run("perform vector searches", func(t *testing.T) { 180 // note this test assumes a recall of 100% which only works with HNSW on 181 // small sizes, so if we use this test suite with massive sizes, we should 182 // not expect this test to succeed 100% of times anymore. 183 runs := 10 184 185 for i := 0; i < runs; i++ { 186 query := make([]float32, vectorDims) 187 for i := range query { 188 query[i] = rnd.Float32() 189 } 190 191 groundTruth := bruteForceObjectsByQuery(data, query) 192 193 node := nodes[rnd.Intn(len(nodes))] 194 res, err := node.repo.VectorSearch(context.Background(), dto.GetParams{ 195 SearchVector: query, 196 Pagination: &filters.Pagination{ 197 Limit: 25, 198 }, 199 ClassName: distributedClass, 200 }) 201 assert.Nil(t, err) 202 for i, obj := range res { 203 assert.Equal(t, groundTruth[i].ID, obj.ID, fmt.Sprintf("at pos %d", i)) 204 } 205 } 206 207 for _, obj := range data { 208 node := nodes[rnd.Intn(len(nodes))] 209 210 res, err := node.repo.ObjectByID(context.Background(), obj.ID, search.SelectProperties{}, additional.Properties{}, "") 211 require.Nil(t, err) 212 require.NotNil(t, res) 213 214 // only compare string prop to avoid having to deal with parsing time 215 // props 216 assert.Equal(t, obj.Properties.(map[string]interface{})["description"], 217 res.Object().Properties.(map[string]interface{})["description"]) 218 } 219 }) 220 221 t.Run("query individually and resolve references", func(t *testing.T) { 222 for _, obj := range refData { 223 // if i == 5 { 224 // break 225 // } 226 node := nodes[rnd.Intn(len(nodes))] 227 228 res, err := node.repo.ObjectByID(context.Background(), obj.ID, search.SelectProperties{ 229 search.SelectProperty{ 230 Name: "toFirst", 231 IsPrimitive: false, 232 Refs: []search.SelectClass{ 233 { 234 ClassName: distributedClass, 235 RefProperties: search.SelectProperties{ 236 search.SelectProperty{ 237 Name: "description", 238 IsPrimitive: true, 239 }, 240 }, 241 }, 242 }, 243 }, 244 }, additional.Properties{}, "") 245 require.Nil(t, err) 246 require.NotNil(t, res) 247 props := res.Object().Properties.(map[string]interface{}) 248 refProp, ok := props["toFirst"].([]interface{}) 249 require.True(t, ok) 250 251 var refPayload []map[string]interface{} 252 for _, res := range refProp { 253 parsed, ok := res.(search.LocalRef) 254 require.True(t, ok) 255 refPayload = append(refPayload, map[string]interface{}{ 256 "description": parsed.Fields["description"], 257 }) 258 } 259 260 actual := manuallyResolveRef(t, obj, data, "toFirst", "description", nil) 261 assert.Equal(t, actual, refPayload) 262 } 263 }) 264 265 t.Run("query individually with cross-ref vectors and resolve references", func(t *testing.T) { 266 for _, obj := range refData { 267 // if i == 1 { 268 // break 269 // } 270 node := nodes[rnd.Intn(len(nodes))] 271 272 res, err := node.repo.Object(context.Background(), obj.Class, obj.ID, search.SelectProperties{ 273 search.SelectProperty{ 274 Name: "toFirst", 275 IsPrimitive: false, 276 Refs: []search.SelectClass{ 277 { 278 ClassName: distributedClass, 279 RefProperties: search.SelectProperties{ 280 search.SelectProperty{ 281 Name: "description", 282 IsPrimitive: true, 283 }, 284 }, 285 AdditionalProperties: additional.Properties{ 286 Vector: true, 287 }, 288 }, 289 }, 290 }, 291 }, additional.Properties{}, nil, "") 292 require.Nil(t, err) 293 require.NotNil(t, res) 294 props := res.Object().Properties.(map[string]interface{}) 295 refProp, ok := props["toFirst"].([]interface{}) 296 require.True(t, ok) 297 298 var refPayload []map[string]interface{} 299 var refVector []map[string]interface{} 300 for _, ref := range refProp { 301 parsed, ok := ref.(search.LocalRef) 302 require.True(t, ok) 303 refPayload = append(refPayload, map[string]interface{}{ 304 "description": parsed.Fields["description"], 305 }) 306 vector, ok := parsed.Fields["vector"].([]float32) 307 require.True(t, ok) 308 require.NotEmpty(t, vector) 309 refVector = append(refVector, map[string]interface{}{ 310 "vector": vector, 311 }) 312 } 313 314 actual := manuallyResolveRef(t, obj, data, "toFirst", "description", nil) 315 assert.Equal(t, actual, refPayload) 316 actual = manuallyResolveRef(t, obj, data, "toFirst", "vector", node.repo) 317 assert.Equal(t, actual, refVector) 318 } 319 }) 320 321 t.Run("ranked keyword search", func(t *testing.T) { 322 for i := 0; i < numberOfObjects; i++ { 323 description := fmt.Sprintf("object %d", i) 324 keywordRanking := &searchparams.KeywordRanking{ 325 Query: description, 326 Properties: []string{"description"}, 327 } 328 329 params := dto.GetParams{ 330 ClassName: distributedClass, 331 KeywordRanking: keywordRanking, 332 Pagination: &filters.Pagination{Limit: 100}, 333 } 334 335 node := nodes[rnd.Intn(len(nodes))] 336 res, err := node.repo.Search(context.Background(), params) 337 require.Nil(t, err) 338 require.NotEmpty(t, res) 339 340 expected := strings.Join(strings.Split(description, " "), "-") 341 received := res[0].Object().Properties.(map[string]interface{})["description"] 342 assert.Equal(t, expected, received) 343 } 344 }) 345 346 t.Run("aggregate count", func(t *testing.T) { 347 params := aggregation.Params{ 348 ClassName: schema.ClassName(distributedClass), 349 IncludeMetaCount: true, 350 } 351 352 node := nodes[rnd.Intn(len(nodes))] 353 res, err := node.repo.Aggregate(context.Background(), params) 354 require.Nil(t, err) 355 356 expectedResult := &aggregation.Result{ 357 Groups: []aggregation.Group{ 358 { 359 Count: numberOfObjects, 360 }, 361 }, 362 } 363 364 assert.Equal(t, expectedResult, res) 365 }) 366 367 t.Run("modify an object using patch", func(t *testing.T) { 368 obj := data[0] 369 370 node := nodes[rnd.Intn(len(nodes))] 371 err := node.repo.Merge(context.Background(), objects.MergeDocument{ 372 Class: distributedClass, 373 ID: obj.ID, 374 PrimitiveSchema: map[string]interface{}{ 375 "other_property": "a-value-inserted-through-merge", 376 }, 377 }, nil, "") 378 379 require.Nil(t, err) 380 }) 381 382 t.Run("verify the patched object contains the additions and orig", func(t *testing.T) { 383 obj := data[0] 384 385 node := nodes[rnd.Intn(len(nodes))] 386 res, err := node.repo.ObjectByID(context.Background(), obj.ID, search.SelectProperties{}, additional.Properties{}, "") 387 388 require.Nil(t, err) 389 previousMap := obj.Properties.(map[string]interface{}) 390 assert.Equal(t, "a-value-inserted-through-merge", res.Object().Properties.(map[string]interface{})["other_property"]) 391 assert.Equal(t, previousMap["description"], res.Object().Properties.(map[string]interface{})["description"]) 392 }) 393 394 // This test prevents a regression on 395 // https://github.com/weaviate/weaviate/issues/1775 396 t.Run("query items by date filter with regular field", func(t *testing.T) { 397 count := len(data) / 2 // try to match half the data objects present 398 cutoff := time.Unix(0, 0).Add(time.Duration(count) * time.Hour) 399 node := nodes[rnd.Intn(len(nodes))] 400 res, err := node.repo.Search(context.Background(), dto.GetParams{ 401 Filters: &filters.LocalFilter{ 402 Root: &filters.Clause{ 403 Operator: filters.OperatorLessThan, 404 On: &filters.Path{ 405 Class: distributedClass, 406 Property: schema.PropertyName("date_property"), 407 }, 408 Value: &filters.Value{ 409 Value: cutoff, 410 Type: schema.DataTypeDate, 411 }, 412 }, 413 }, 414 ClassName: distributedClass, 415 Pagination: &filters.Pagination{ 416 Limit: len(data), 417 }, 418 }) 419 420 require.Nil(t, err) 421 assert.Equal(t, count, len(res)) 422 }) 423 424 // This test prevents a regression on 425 // https://github.com/weaviate/weaviate/issues/1775 426 t.Run("query items by date filter with array field", func(t *testing.T) { 427 count := len(data) / 2 // try to match half the data objects present 428 cutoff := time.Unix(0, 0).Add(time.Duration(count) * time.Hour) 429 node := nodes[rnd.Intn(len(nodes))] 430 res, err := node.repo.Search(context.Background(), dto.GetParams{ 431 Filters: &filters.LocalFilter{ 432 Root: &filters.Clause{ 433 Operator: filters.OperatorLessThan, 434 On: &filters.Path{ 435 Class: distributedClass, 436 Property: schema.PropertyName("date_array_property"), 437 }, 438 Value: &filters.Value{ 439 Value: cutoff, 440 Type: schema.DataTypeDate, 441 }, 442 }, 443 }, 444 ClassName: distributedClass, 445 Pagination: &filters.Pagination{ 446 Limit: len(data), 447 }, 448 }) 449 450 require.Nil(t, err) 451 assert.Equal(t, count, len(res)) 452 }) 453 454 t.Run("sort by", func(t *testing.T) { 455 getPhoneNumber := func(a search.Result) *float64 { 456 prop := a.Object().Properties.(map[string]interface{})["phone_property"] 457 if phoneNumber, ok := prop.(*models.PhoneNumber); ok { 458 phoneStr := fmt.Sprintf("%v%v", phoneNumber.CountryCode, phoneNumber.National) 459 if phone, err := strconv.ParseFloat(phoneStr, 64); err == nil { 460 return &phone 461 } 462 } 463 return nil 464 } 465 getDate := func(a search.Result) *time.Time { 466 asString := a.Object().Properties.(map[string]interface{})["date_property"].(string) 467 if date, err := time.Parse(time.RFC3339, asString); err == nil { 468 return &date 469 } 470 return nil 471 } 472 testData := []struct { 473 name string 474 sort []filters.Sort 475 compareFn func(a, b search.Result) bool 476 }{ 477 { 478 name: "description asc", 479 sort: []filters.Sort{{Path: []string{"description"}, Order: "asc"}}, 480 compareFn: func(a, b search.Result) bool { 481 descriptionA := a.Object().Properties.(map[string]interface{})["description"].(string) 482 descriptionB := b.Object().Properties.(map[string]interface{})["description"].(string) 483 return strings.ToLower(descriptionA) <= strings.ToLower(descriptionB) 484 }, 485 }, 486 { 487 name: "description desc", 488 sort: []filters.Sort{{Path: []string{"description"}, Order: "desc"}}, 489 compareFn: func(a, b search.Result) bool { 490 descriptionA := a.Object().Properties.(map[string]interface{})["description"].(string) 491 descriptionB := b.Object().Properties.(map[string]interface{})["description"].(string) 492 return strings.ToLower(descriptionA) >= strings.ToLower(descriptionB) 493 }, 494 }, 495 { 496 name: "date_property asc", 497 sort: []filters.Sort{{Path: []string{"date_property"}, Order: "asc"}}, 498 compareFn: func(a, b search.Result) bool { 499 datePropA, datePropB := getDate(a), getDate(b) 500 if datePropA != nil && datePropB != nil { 501 return datePropA.Before(*datePropB) 502 } 503 return false 504 }, 505 }, 506 { 507 name: "date_property desc", 508 sort: []filters.Sort{{Path: []string{"date_property"}, Order: "desc"}}, 509 compareFn: func(a, b search.Result) bool { 510 datePropA, datePropB := getDate(a), getDate(b) 511 if datePropA != nil && datePropB != nil { 512 return datePropA.After(*datePropB) 513 } 514 return false 515 }, 516 }, 517 { 518 name: "int_property asc", 519 sort: []filters.Sort{{Path: []string{"int_property"}, Order: "asc"}}, 520 compareFn: func(a, b search.Result) bool { 521 intPropertyA := a.Object().Properties.(map[string]interface{})["int_property"].(float64) 522 intPropertyB := b.Object().Properties.(map[string]interface{})["int_property"].(float64) 523 return intPropertyA <= intPropertyB 524 }, 525 }, 526 { 527 name: "int_property desc", 528 sort: []filters.Sort{{Path: []string{"int_property"}, Order: "desc"}}, 529 compareFn: func(a, b search.Result) bool { 530 intPropertyA := a.Object().Properties.(map[string]interface{})["int_property"].(float64) 531 intPropertyB := b.Object().Properties.(map[string]interface{})["int_property"].(float64) 532 return intPropertyA >= intPropertyB 533 }, 534 }, 535 { 536 name: "phone_property asc", 537 sort: []filters.Sort{{Path: []string{"phone_property"}, Order: "asc"}}, 538 compareFn: func(a, b search.Result) bool { 539 phoneA, phoneB := getPhoneNumber(a), getPhoneNumber(b) 540 if phoneA != nil && phoneB != nil { 541 return *phoneA <= *phoneB 542 } 543 return false 544 }, 545 }, 546 { 547 name: "phone_property desc", 548 sort: []filters.Sort{{Path: []string{"phone_property"}, Order: "desc"}}, 549 compareFn: func(a, b search.Result) bool { 550 phoneA, phoneB := getPhoneNumber(a), getPhoneNumber(b) 551 if phoneA != nil && phoneB != nil { 552 return *phoneA >= *phoneB 553 } 554 return false 555 }, 556 }, 557 } 558 for _, td := range testData { 559 t.Run(td.name, func(t *testing.T) { 560 params := dto.GetParams{ 561 ClassName: distributedClass, 562 Sort: td.sort, 563 Pagination: &filters.Pagination{Limit: 100}, 564 } 565 566 node := nodes[rnd.Intn(len(nodes))] 567 res, err := node.repo.Search(context.Background(), params) 568 require.Nil(t, err) 569 require.NotEmpty(t, res) 570 571 if len(res) > 1 { 572 for i := 1; i < len(res); i++ { 573 assert.True(t, td.compareFn(res[i-1], res[i])) 574 } 575 } 576 }) 577 } 578 }) 579 580 t.Run("node names by shard", func(t *testing.T) { 581 for _, n := range nodes { 582 nodeSet := make(map[string]bool) 583 foundNodes, err := n.repo.Shards(context.Background(), distributedClass) 584 assert.NoError(t, err) 585 for _, found := range foundNodes { 586 nodeSet[found] = true 587 } 588 assert.Len(t, nodeSet, numberOfNodes, "expected %d nodes, got %d", 589 numberOfNodes, len(foundNodes)) 590 } 591 }) 592 593 t.Run("delete a third of the data from random nodes", func(t *testing.T) { 594 for i, obj := range data { 595 if i%3 != 0 { 596 // keep this item 597 continue 598 } 599 600 node := nodes[rnd.Intn(len(nodes))] 601 err := node.repo.DeleteObject(context.Background(), distributedClass, obj.ID, nil, "") 602 require.Nil(t, err) 603 } 604 }) 605 606 t.Run("make sure 2/3 exist, 1/3 no longer exists", func(t *testing.T) { 607 for i, obj := range data { 608 expected := true 609 if i%3 == 0 { 610 expected = false 611 } 612 613 node := nodes[rnd.Intn(len(nodes))] 614 actual, err := node.repo.Exists(context.Background(), distributedClass, obj.ID, nil, "") 615 require.Nil(t, err) 616 assert.Equal(t, expected, actual) 617 } 618 }) 619 620 t.Run("batch delete the remaining 2/3 of data", func(t *testing.T) { 621 getParams := func(className string, dryRun bool) objects.BatchDeleteParams { 622 return objects.BatchDeleteParams{ 623 ClassName: schema.ClassName(className), 624 Filters: &filters.LocalFilter{ 625 Root: &filters.Clause{ 626 Operator: filters.OperatorLike, 627 Value: &filters.Value{ 628 Value: "*", 629 Type: schema.DataTypeText, 630 }, 631 On: &filters.Path{ 632 Property: "id", 633 }, 634 }, 635 }, 636 DryRun: dryRun, 637 Output: "verbose", 638 } 639 } 640 performClassSearch := func(repo *db.DB, className string) ([]search.Result, error) { 641 return repo.Search(context.Background(), dto.GetParams{ 642 ClassName: className, 643 Pagination: &filters.Pagination{Limit: 10000}, 644 }) 645 } 646 node := nodes[rnd.Intn(len(nodes))] 647 // get the initial count of the objects 648 res, err := performClassSearch(node.repo, distributedClass) 649 require.Nil(t, err) 650 beforeDelete := len(res) 651 require.True(t, beforeDelete > 0) 652 // dryRun == false, perform actual delete 653 batchDeleteRes, err := node.repo.BatchDeleteObjects(context.Background(), getParams(distributedClass, false), nil, "") 654 require.Nil(t, err) 655 require.Equal(t, int64(beforeDelete), batchDeleteRes.Matches) 656 require.Equal(t, beforeDelete, len(batchDeleteRes.Objects)) 657 for _, batchRes := range batchDeleteRes.Objects { 658 require.Nil(t, batchRes.Err) 659 } 660 // check that every object is deleted 661 res, err = performClassSearch(node.repo, distributedClass) 662 require.Nil(t, err) 663 require.Equal(t, 0, len(res)) 664 }) 665 666 t.Run("shutdown", func(t *testing.T) { 667 for _, node := range nodes { 668 node.repo.Shutdown(context.Background()) 669 } 670 }) 671 }