github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/multi_shard_integration_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 //go:build integrationTest 13 // +build integrationTest 14 15 package db 16 17 import ( 18 "context" 19 "fmt" 20 "math" 21 "math/rand" 22 "sort" 23 "testing" 24 25 "github.com/go-openapi/strfmt" 26 "github.com/google/uuid" 27 "github.com/sirupsen/logrus" 28 "github.com/sirupsen/logrus/hooks/test" 29 "github.com/stretchr/testify/assert" 30 "github.com/stretchr/testify/require" 31 "github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer" 32 "github.com/weaviate/weaviate/entities/additional" 33 "github.com/weaviate/weaviate/entities/dto" 34 "github.com/weaviate/weaviate/entities/filters" 35 "github.com/weaviate/weaviate/entities/models" 36 "github.com/weaviate/weaviate/entities/schema" 37 "github.com/weaviate/weaviate/entities/schema/crossref" 38 "github.com/weaviate/weaviate/entities/search" 39 "github.com/weaviate/weaviate/entities/searchparams" 40 enthnsw "github.com/weaviate/weaviate/entities/vectorindex/hnsw" 41 "github.com/weaviate/weaviate/entities/verbosity" 42 "github.com/weaviate/weaviate/usecases/objects" 43 "github.com/weaviate/weaviate/usecases/sharding" 44 ) 45 46 func Test_MultiShardJourneys_IndividualImports(t *testing.T) { 47 r := getRandomSeed() 48 repo, logger := setupMultiShardTest(t) 49 defer func() { 50 repo.Shutdown(context.Background()) 51 }() 52 53 t.Run("prepare", makeTestMultiShardSchema(repo, logger, false, testClassesForImporting()...)) 54 55 data := multiShardTestData(r) 56 queryVec := exampleQueryVec(r) 57 groundTruth := bruteForceObjectsByQuery(data, queryVec) 58 refData := multiShardRefClassData(r, data) 59 60 t.Run("import all individually", func(t *testing.T) { 61 for _, obj := range data { 62 require.Nil(t, repo.PutObject(context.Background(), obj, obj.Vector, nil, nil)) 63 } 64 }) 65 66 t.Run("nodes api", testNodesAPI(repo)) 67 68 t.Run("sorting objects", makeTestSortingClass(repo)) 69 70 t.Run("verify objects", makeTestRetrievingBaseClass(repo, data, queryVec, 71 groundTruth)) 72 73 t.Run("import refs individually", func(t *testing.T) { 74 for _, obj := range refData { 75 require.Nil(t, repo.PutObject(context.Background(), obj, obj.Vector, nil, nil)) 76 } 77 }) 78 79 t.Run("verify refs", makeTestRetrieveRefClass(repo, data, refData)) 80 81 t.Run("batch delete", makeTestBatchDeleteAllObjects(repo)) 82 } 83 84 func Test_MultiShardJourneys_BatchedImports(t *testing.T) { 85 r := getRandomSeed() 86 repo, logger := setupMultiShardTest(t) 87 defer func() { 88 repo.Shutdown(context.Background()) 89 }() 90 91 t.Run("prepare", makeTestMultiShardSchema(repo, logger, false, testClassesForImporting()...)) 92 93 data := multiShardTestData(r) 94 queryVec := exampleQueryVec(r) 95 groundTruth := bruteForceObjectsByQuery(data, queryVec) 96 refData := multiShardRefClassData(r, data) 97 98 t.Run("import in a batch", func(t *testing.T) { 99 batch := make(objects.BatchObjects, len(data)) 100 for i, obj := range data { 101 batch[i] = objects.BatchObject{ 102 OriginalIndex: i, 103 Object: obj, 104 UUID: obj.ID, 105 } 106 } 107 108 _, err := repo.BatchPutObjects(context.Background(), batch, nil) 109 require.Nil(t, err) 110 }) 111 112 t.Run("nodes api", testNodesAPI(repo)) 113 114 t.Run("verify objects", makeTestRetrievingBaseClass(repo, data, queryVec, 115 groundTruth)) 116 117 t.Run("import refs in large batch", func(t *testing.T) { 118 // first strip the refs from the objects, so we can import them in a second 119 // step as batch ref 120 121 for _, obj := range refData { 122 withoutRef := &models.Object{ 123 ID: obj.ID, 124 Class: obj.Class, 125 Vector: obj.Vector, 126 Properties: map[string]interface{}{}, // empty so we remove the ref 127 } 128 129 require.Nil(t, repo.PutObject(context.Background(), withoutRef, withoutRef.Vector, nil, nil)) 130 } 131 132 index := 0 133 refBatch := make(objects.BatchReferences, len(refData)*len(data)) 134 for _, obj := range refData { 135 for _, ref := range obj.Properties.(map[string]interface{})["toOther"].(models.MultipleRef) { 136 to, _ := crossref.ParseSingleRef(ref) 137 refBatch[index] = objects.BatchReference{ 138 OriginalIndex: index, 139 To: to, 140 From: crossref.NewSource(schema.ClassName(obj.Class), "toOther", obj.ID), 141 } 142 index++ 143 } 144 } 145 146 _, err := repo.AddBatchReferences(context.Background(), refBatch, nil) 147 require.Nil(t, err) 148 }) 149 150 t.Run("verify refs", makeTestRetrieveRefClass(repo, data, refData)) 151 152 t.Run("batch delete", makeTestBatchDeleteAllObjects(repo)) 153 } 154 155 func Test_MultiShardJourneys_BM25_Search(t *testing.T) { 156 repo, logger := setupMultiShardTest(t) 157 defer func() { 158 repo.Shutdown(context.Background()) 159 }() 160 161 className := "RacecarPosts" 162 163 t.Run("prepare", func(t *testing.T) { 164 class := &models.Class{ 165 Class: className, 166 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 167 InvertedIndexConfig: &models.InvertedIndexConfig{ 168 CleanupIntervalSeconds: 60, 169 }, 170 Properties: []*models.Property{ 171 { 172 Name: "contents", 173 DataType: schema.DataTypeText.PropString(), 174 Tokenization: models.PropertyTokenizationWord, 175 }, 176 { 177 Name: "stringProp", 178 DataType: schema.DataTypeText.PropString(), 179 Tokenization: models.PropertyTokenizationWhitespace, 180 }, 181 { 182 Name: "textArrayProp", 183 DataType: []string{string(schema.DataTypeTextArray)}, 184 }, 185 }, 186 } 187 188 t.Run("prepare", makeTestMultiShardSchema(repo, logger, true, class)) 189 }) 190 191 t.Run("insert search data", func(t *testing.T) { 192 objs := objects.BatchObjects{ 193 { 194 UUID: "c39751ed-ddc2-4c9f-a45b-8b5732ddde56", 195 Object: &models.Object{ 196 ID: "c39751ed-ddc2-4c9f-a45b-8b5732ddde56", 197 Class: className, 198 Properties: map[string]interface{}{ 199 "contents": "Team Lotus was a domineering force in the early 90s", 200 }, 201 }, 202 }, 203 { 204 UUID: "5d034311-06e1-476e-b446-1306db91d906", 205 Object: &models.Object{ 206 ID: "5d034311-06e1-476e-b446-1306db91d906", 207 Class: className, 208 Properties: map[string]interface{}{ 209 "contents": "When a car becomes unserviceable, the driver must retire early from the race", 210 }, 211 }, 212 }, 213 { 214 UUID: "01989a8c-e37f-471d-89ca-9a787dbbf5f2", 215 Object: &models.Object{ 216 ID: "01989a8c-e37f-471d-89ca-9a787dbbf5f2", 217 Class: className, 218 Properties: map[string]interface{}{ 219 "contents": "A young driver is better than an old driver", 220 }, 221 }, 222 }, 223 { 224 UUID: "392614c5-4ca4-4630-a014-61fe868a20fd", 225 Object: &models.Object{ 226 ID: "392614c5-4ca4-4630-a014-61fe868a20fd", 227 Class: className, 228 Properties: map[string]interface{}{ 229 "contents": "an old driver doesn't retire early", 230 }, 231 }, 232 }, 233 } 234 235 _, err := repo.BatchPutObjects(context.Background(), objs, nil) 236 require.Nil(t, err) 237 }) 238 239 t.Run("ranked keyword search", func(t *testing.T) { 240 type testcase struct { 241 expectedResults []string 242 rankingParams *searchparams.KeywordRanking 243 } 244 245 tests := []testcase{ 246 { 247 rankingParams: &searchparams.KeywordRanking{ 248 Query: "driver", 249 Properties: []string{"contents"}, 250 }, 251 expectedResults: []string{ 252 "01989a8c-e37f-471d-89ca-9a787dbbf5f2", 253 "392614c5-4ca4-4630-a014-61fe868a20fd", 254 "5d034311-06e1-476e-b446-1306db91d906", 255 }, 256 }, 257 } 258 259 for _, test := range tests { 260 res, err := repo.Search(context.Background(), dto.GetParams{ 261 ClassName: className, 262 Pagination: &filters.Pagination{Limit: 10}, 263 KeywordRanking: test.rankingParams, 264 }) 265 require.Nil(t, err) 266 require.Equal(t, len(test.expectedResults), len(res)) 267 for i := range res { 268 assert.Equal(t, test.expectedResults[i], res[i].ID.String()) 269 } 270 t.Logf("res: %+v", res) 271 } 272 }) 273 } 274 275 func setupMultiShardTest(t *testing.T) (*DB, *logrus.Logger) { 276 dirName := t.TempDir() 277 278 logger, _ := test.NewNullLogger() 279 repo, err := New(logger, Config{ 280 ServerVersion: "server-version", 281 GitHash: "git-hash", 282 MemtablesFlushDirtyAfter: 60, 283 RootPath: dirName, 284 QueryMaximumResults: 10000, 285 MaxImportGoroutinesFactor: 1, 286 }, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil) 287 require.Nil(t, err) 288 return repo, logger 289 } 290 291 func makeTestMultiShardSchema(repo *DB, logger logrus.FieldLogger, fixedShardState bool, classes ...*models.Class) func(t *testing.T) { 292 return func(t *testing.T) { 293 var shardState *sharding.State 294 if fixedShardState { 295 shardState = fixedMultiShardState() 296 } else { 297 shardState = multiShardState() 298 } 299 schemaGetter := &fakeSchemaGetter{ 300 schema: schema.Schema{Objects: &models.Schema{Classes: nil}}, 301 shardState: shardState, 302 } 303 repo.SetSchemaGetter(schemaGetter) 304 err := repo.WaitForStartup(testCtx()) 305 require.Nil(t, err) 306 migrator := NewMigrator(repo, logger) 307 308 t.Run("creating the class", func(t *testing.T) { 309 for _, class := range classes { 310 require.Nil(t, migrator.AddClass(context.Background(), class, schemaGetter.shardState)) 311 } 312 }) 313 314 // update schema getter so it's in sync with class 315 schemaGetter.schema = schema.Schema{ 316 Objects: &models.Schema{ 317 Classes: classes, 318 }, 319 } 320 } 321 } 322 323 func makeTestRetrievingBaseClass(repo *DB, data []*models.Object, 324 queryVec []float32, groundTruth []*models.Object, 325 ) func(t *testing.T) { 326 return func(t *testing.T) { 327 t.Run("retrieve all individually", func(t *testing.T) { 328 for _, desired := range data { 329 res, err := repo.ObjectByID(context.Background(), desired.ID, search.SelectProperties{}, additional.Properties{}, "") 330 assert.Nil(t, err) 331 332 require.NotNil(t, res) 333 assert.Equal(t, desired.Properties.(map[string]interface{})["boolProp"].(bool), 334 res.Object().Properties.(map[string]interface{})["boolProp"].(bool)) 335 assert.Equal(t, desired.ID, res.Object().ID) 336 } 337 }) 338 339 t.Run("retrieve through filter (object search)", func(t *testing.T) { 340 do := func(limit, expected int) { 341 filters := &filters.LocalFilter{ 342 Root: &filters.Clause{ 343 Operator: filters.OperatorEqual, 344 Value: &filters.Value{ 345 Value: true, 346 Type: schema.DataTypeBoolean, 347 }, 348 On: &filters.Path{ 349 Property: "boolProp", 350 }, 351 }, 352 } 353 res, err := repo.ObjectSearch(context.Background(), 0, limit, filters, nil, 354 additional.Properties{}, "") 355 assert.Nil(t, err) 356 357 assert.Len(t, res, expected) 358 for _, obj := range res { 359 assert.Equal(t, true, obj.Schema.(map[string]interface{})["boolProp"].(bool)) 360 } 361 } 362 363 t.Run("with high limit", func(t *testing.T) { 364 do(100, 10) 365 }) 366 367 t.Run("with low limit", func(t *testing.T) { 368 do(3, 3) 369 }) 370 }) 371 372 t.Run("retrieve through filter (class search)", func(t *testing.T) { 373 do := func(limit, expected int) { 374 filter := &filters.LocalFilter{ 375 Root: &filters.Clause{ 376 Operator: filters.OperatorEqual, 377 Value: &filters.Value{ 378 Value: true, 379 Type: schema.DataTypeBoolean, 380 }, 381 On: &filters.Path{ 382 Property: "boolProp", 383 }, 384 }, 385 } 386 res, err := repo.Search(context.Background(), dto.GetParams{ 387 Filters: filter, 388 Pagination: &filters.Pagination{ 389 Limit: limit, 390 }, 391 ClassName: "TestClass", 392 }) 393 assert.Nil(t, err) 394 395 assert.Len(t, res, expected) 396 for _, obj := range res { 397 assert.Equal(t, true, obj.Schema.(map[string]interface{})["boolProp"].(bool)) 398 } 399 } 400 401 t.Run("with high limit", func(t *testing.T) { 402 do(100, 10) 403 }) 404 405 t.Run("with low limit", func(t *testing.T) { 406 do(3, 3) 407 }) 408 }) 409 410 t.Run("retrieve through class-level vector search", func(t *testing.T) { 411 do := func(t *testing.T, limit, expected int) { 412 res, err := repo.VectorSearch(context.Background(), dto.GetParams{ 413 SearchVector: queryVec, 414 Pagination: &filters.Pagination{ 415 Limit: limit, 416 }, 417 ClassName: "TestClass", 418 }) 419 assert.Nil(t, err) 420 assert.Len(t, res, expected) 421 for i, obj := range res { 422 assert.Equal(t, groundTruth[i].ID, obj.ID) 423 } 424 } 425 426 t.Run("with high limit", func(t *testing.T) { 427 do(t, 100, 20) 428 }) 429 430 t.Run("with low limit", func(t *testing.T) { 431 do(t, 3, 3) 432 }) 433 }) 434 435 t.Run("retrieve through inter-class vector search", func(t *testing.T) { 436 do := func(t *testing.T, limit, expected int) { 437 res, err := repo.CrossClassVectorSearch(context.Background(), queryVec, "", 0, limit, nil) 438 assert.Nil(t, err) 439 assert.Len(t, res, expected) 440 for i, obj := range res { 441 assert.Equal(t, groundTruth[i].ID, obj.ID) 442 } 443 } 444 445 t.Run("with high limit", func(t *testing.T) { 446 do(t, 100, 20) 447 }) 448 449 t.Run("with low limit", func(t *testing.T) { 450 do(t, 3, 3) 451 }) 452 }) 453 } 454 } 455 456 func makeTestRetrieveRefClass(repo *DB, data, refData []*models.Object) func(t *testing.T) { 457 return func(t *testing.T) { 458 t.Run("retrieve ref data individually with select props", func(t *testing.T) { 459 for _, desired := range refData { 460 res, err := repo.ObjectByID(context.Background(), desired.ID, search.SelectProperties{ 461 search.SelectProperty{ 462 IsPrimitive: false, 463 Name: "toOther", 464 Refs: []search.SelectClass{{ 465 ClassName: "TestClass", 466 RefProperties: search.SelectProperties{{ 467 Name: "index", 468 IsPrimitive: true, 469 }}, 470 }}, 471 }, 472 }, additional.Properties{}, "") 473 assert.Nil(t, err) 474 refs := res.Schema.(map[string]interface{})["toOther"].([]interface{}) 475 assert.Len(t, refs, len(data)) 476 for i, ref := range refs { 477 indexField := ref.(search.LocalRef).Fields["index"].(float64) 478 assert.Equal(t, i, int(indexField)) 479 } 480 } 481 }) 482 } 483 } 484 485 func makeTestSortingClass(repo *DB) func(t *testing.T) { 486 return func(t *testing.T) { 487 t.Run("sort by property", func(t *testing.T) { 488 getIndex := func(res search.Result) float64 { 489 if prop := res.Object().Properties.(map[string]interface{})["index"]; prop != nil { 490 return prop.(float64) 491 } 492 return -1 493 } 494 getBoolProp := func(res search.Result) bool { 495 if prop := res.Object().Properties.(map[string]interface{})["boolProp"]; prop != nil { 496 return prop.(bool) 497 } 498 return false 499 } 500 getStringProp := func(res search.Result) string { 501 if prop := res.Object().Properties.(map[string]interface{})["stringProp"]; prop != nil { 502 return prop.(string) 503 } 504 return "" 505 } 506 getTextArrayProp := func(res search.Result) []string { 507 if prop := res.Object().Properties.(map[string]interface{})["textArrayProp"]; prop != nil { 508 return prop.([]string) 509 } 510 return nil 511 } 512 type test struct { 513 name string 514 sort []filters.Sort 515 expectedIndexes []float64 516 expectedBoolProps []bool 517 expectedStringProps []string 518 expectedTextArrayProps [][]string 519 constainsErrorMsgs []string 520 } 521 tests := []test{ 522 { 523 name: "indexProp desc", 524 sort: []filters.Sort{{Path: []string{"indexProp"}, Order: "desc"}}, 525 expectedIndexes: []float64{19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, 526 }, 527 { 528 name: "indexProp asc", 529 sort: []filters.Sort{{Path: []string{"indexProp"}, Order: "asc"}}, 530 expectedIndexes: []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, 531 }, 532 { 533 name: "stringProp desc", 534 sort: []filters.Sort{{Path: []string{"stringProp"}, Order: "desc"}}, 535 expectedStringProps: []string{"s19", "s18", "s17", "s16", "s15", "s14", "s13", "s12", "s11", "s10", "s09", "s08", "s07", "s06", "s05", "s04", "s03", "s02", "s01", "s00"}, 536 }, 537 { 538 name: "stringProp asc", 539 sort: []filters.Sort{{Path: []string{"stringProp"}, Order: "asc"}}, 540 expectedStringProps: []string{"s00", "s01", "s02", "s03", "s04", "s05", "s06", "s07", "s08", "s09", "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19"}, 541 }, 542 { 543 name: "textArrayProp desc", 544 sort: []filters.Sort{{Path: []string{"textArrayProp"}, Order: "desc"}}, 545 expectedTextArrayProps: [][]string{{"s19", "19"}, {"s18", "18"}, {"s17", "17"}, {"s16", "16"}, {"s15", "15"}, {"s14", "14"}, {"s13", "13"}, {"s12", "12"}, {"s11", "11"}, {"s10", "10"}, {"s09", "09"}, {"s08", "08"}, {"s07", "07"}, {"s06", "06"}, {"s05", "05"}, {"s04", "04"}, {"s03", "03"}, {"s02", "02"}, {"s01", "01"}, {"s00", "00"}}, 546 }, 547 { 548 name: "textArrayProp asc", 549 sort: []filters.Sort{{Path: []string{"textArrayProp"}, Order: "asc"}}, 550 expectedTextArrayProps: [][]string{{"s00", "00"}, {"s01", "01"}, {"s02", "02"}, {"s03", "03"}, {"s04", "04"}, {"s05", "05"}, {"s06", "06"}, {"s07", "07"}, {"s08", "08"}, {"s09", "09"}, {"s10", "10"}, {"s11", "11"}, {"s12", "12"}, {"s13", "13"}, {"s14", "14"}, {"s15", "15"}, {"s16", "16"}, {"s17", "17"}, {"s18", "18"}, {"s19", "19"}}, 551 }, 552 { 553 name: "boolProp desc", 554 sort: []filters.Sort{{Path: []string{"boolProp"}, Order: "desc"}}, 555 expectedBoolProps: []bool{true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false}, 556 }, 557 { 558 name: "boolProp asc", 559 sort: []filters.Sort{{Path: []string{"boolProp"}, Order: "asc"}}, 560 expectedBoolProps: []bool{false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true}, 561 }, 562 { 563 name: "boolProp asc stringProp asc", 564 sort: []filters.Sort{{Path: []string{"boolProp"}, Order: "asc"}, {Path: []string{"stringProp"}, Order: "asc"}}, 565 expectedBoolProps: []bool{false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true}, 566 expectedStringProps: []string{"s01", "s03", "s05", "s07", "s09", "s11", "s13", "s15", "s17", "s19", "s00", "s02", "s04", "s06", "s08", "s10", "s12", "s14", "s16", "s18"}, 567 }, 568 { 569 name: "boolProp desc stringProp asc", 570 sort: []filters.Sort{{Path: []string{"boolProp"}, Order: "desc"}, {Path: []string{"stringProp"}, Order: "asc"}}, 571 expectedBoolProps: []bool{true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false}, 572 expectedStringProps: []string{"s00", "s02", "s04", "s06", "s08", "s10", "s12", "s14", "s16", "s18", "s01", "s03", "s05", "s07", "s09", "s11", "s13", "s15", "s17", "s19"}, 573 }, 574 { 575 name: "boolProp asc indexProp asc", 576 sort: []filters.Sort{{Path: []string{"boolProp"}, Order: "asc"}, {Path: []string{"indexProp"}, Order: "asc"}}, 577 expectedBoolProps: []bool{false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true}, 578 expectedIndexes: []float64{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18}, 579 }, 580 { 581 name: "boolProp asc indexProp desc", 582 sort: []filters.Sort{{Path: []string{"boolProp"}, Order: "asc"}, {Path: []string{"indexProp"}, Order: "desc"}}, 583 expectedBoolProps: []bool{false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true}, 584 expectedIndexes: []float64{19, 17, 15, 13, 11, 9, 7, 5, 3, 1, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0}, 585 }, 586 { 587 name: "index property doesn't exist in testrefclass", 588 sort: []filters.Sort{{Path: []string{"index"}, Order: "desc"}}, 589 expectedIndexes: nil, 590 constainsErrorMsgs: []string{ 591 "no such prop with name 'index' found in class 'TestRefClass' in the schema. " + 592 "Check your schema files for which properties in this class are available", 593 }, 594 }, 595 { 596 name: "non existent property in all classes", 597 sort: []filters.Sort{{Path: []string{"nonexistentproperty"}, Order: "desc"}}, 598 expectedIndexes: nil, 599 constainsErrorMsgs: []string{ 600 "no such prop with name 'nonexistentproperty' found in class 'TestClass' in the schema. " + 601 "Check your schema files for which properties in this class are available", 602 "no such prop with name 'nonexistentproperty' found in class 'TestRefClass' in the schema. " + 603 "Check your schema files for which properties in this class are available", 604 }, 605 }, 606 } 607 for _, test := range tests { 608 t.Run(test.name, func(t *testing.T) { 609 res, err := repo.ObjectSearch(context.Background(), 0, 1000, nil, test.sort, 610 additional.Properties{}, "") 611 if len(test.constainsErrorMsgs) > 0 { 612 require.NotNil(t, err) 613 for _, errorMsg := range test.constainsErrorMsgs { 614 assert.Contains(t, err.Error(), errorMsg) 615 } 616 } else { 617 require.Nil(t, err) 618 if len(test.expectedIndexes) > 0 { 619 for i := range res { 620 assert.Equal(t, test.expectedIndexes[i], getIndex(res[i])) 621 } 622 } 623 if len(test.expectedBoolProps) > 0 { 624 for i := range res { 625 assert.Equal(t, test.expectedBoolProps[i], getBoolProp(res[i])) 626 } 627 } 628 if len(test.expectedStringProps) > 0 { 629 for i := range res { 630 assert.Equal(t, test.expectedStringProps[i], getStringProp(res[i])) 631 } 632 } 633 if len(test.expectedTextArrayProps) > 0 { 634 for i := range res { 635 assert.EqualValues(t, test.expectedTextArrayProps[i], getTextArrayProp(res[i])) 636 } 637 } 638 } 639 }) 640 } 641 }) 642 } 643 } 644 645 func testNodesAPI(repo *DB) func(t *testing.T) { 646 return func(t *testing.T) { 647 nodeStatues, err := repo.GetNodeStatus(context.Background(), "", verbosity.OutputVerbose) 648 require.Nil(t, err) 649 require.NotNil(t, nodeStatues) 650 651 require.Len(t, nodeStatues, 1) 652 nodeStatus := nodeStatues[0] 653 assert.NotNil(t, nodeStatus) 654 assert.Equal(t, "node1", nodeStatus.Name) 655 assert.Equal(t, "server-version", nodeStatus.Version) 656 assert.Equal(t, "git-hash", nodeStatus.GitHash) 657 assert.Len(t, nodeStatus.Shards, 6) 658 var testClassShardsCount, testClassObjectsCount int64 659 var testRefClassShardsCount, testRefClassObjectsCount int64 660 for _, status := range nodeStatus.Shards { 661 if status.Class == "TestClass" { 662 testClassShardsCount += 1 663 testClassObjectsCount += status.ObjectCount 664 } 665 if status.Class == "TestRefClass" { 666 testRefClassShardsCount += 1 667 testRefClassObjectsCount += status.ObjectCount 668 } 669 } 670 assert.Equal(t, int64(3), testClassShardsCount) 671 // a previous version of this test made assertions on object counts, 672 // however with object count becoming async, we can no longer make exact 673 // assertions here. See https://github.com/weaviate/weaviate/issues/4193 674 // for details. 675 assert.Equal(t, int64(3), testRefClassShardsCount) 676 assert.Equal(t, int64(6), nodeStatus.Stats.ShardCount) 677 } 678 } 679 680 func makeTestBatchDeleteAllObjects(repo *DB) func(t *testing.T) { 681 return func(t *testing.T) { 682 performDelete := func(t *testing.T, className string) { 683 getParams := func(className string, dryRun bool) objects.BatchDeleteParams { 684 return objects.BatchDeleteParams{ 685 ClassName: schema.ClassName(className), 686 Filters: &filters.LocalFilter{ 687 Root: &filters.Clause{ 688 Operator: filters.OperatorLike, 689 Value: &filters.Value{ 690 Value: "*", 691 Type: schema.DataTypeText, 692 }, 693 On: &filters.Path{ 694 Property: "id", 695 }, 696 }, 697 }, 698 DryRun: dryRun, 699 Output: "verbose", 700 } 701 } 702 performClassSearch := func(className string) ([]search.Result, error) { 703 return repo.Search(context.Background(), dto.GetParams{ 704 ClassName: className, 705 Pagination: &filters.Pagination{Limit: 10000}, 706 }) 707 } 708 // get the initial count of the objects 709 res, err := performClassSearch(className) 710 require.Nil(t, err) 711 beforeDelete := len(res) 712 require.True(t, beforeDelete > 0) 713 // dryRun == true 714 batchDeleteRes, err := repo.BatchDeleteObjects(context.Background(), getParams(className, true), nil, "") 715 require.Nil(t, err) 716 require.Equal(t, int64(beforeDelete), batchDeleteRes.Matches) 717 require.Equal(t, beforeDelete, len(batchDeleteRes.Objects)) 718 for _, batchRes := range batchDeleteRes.Objects { 719 require.Nil(t, batchRes.Err) 720 } 721 // check that every object is preserved (not deleted) 722 res, err = performClassSearch(className) 723 require.Nil(t, err) 724 require.Equal(t, beforeDelete, len(res)) 725 // dryRun == false, perform actual delete 726 batchDeleteRes, err = repo.BatchDeleteObjects(context.Background(), getParams(className, false), nil, "") 727 require.Nil(t, err) 728 require.Equal(t, int64(beforeDelete), batchDeleteRes.Matches) 729 require.Equal(t, beforeDelete, len(batchDeleteRes.Objects)) 730 for _, batchRes := range batchDeleteRes.Objects { 731 require.Nil(t, batchRes.Err) 732 } 733 // check that every object is deleted 734 res, err = performClassSearch(className) 735 require.Nil(t, err) 736 require.Equal(t, 0, len(res)) 737 } 738 t.Run("batch delete TestRefClass", func(t *testing.T) { 739 performDelete(t, "TestRefClass") 740 }) 741 t.Run("batch delete TestClass", func(t *testing.T) { 742 performDelete(t, "TestClass") 743 }) 744 } 745 } 746 747 func exampleQueryVec(r *rand.Rand) []float32 { 748 dim := 10 749 vec := make([]float32, dim) 750 for j := range vec { 751 vec[j] = r.Float32() 752 } 753 return vec 754 } 755 756 func multiShardTestData(r *rand.Rand) []*models.Object { 757 size := 20 758 dim := 10 759 out := make([]*models.Object, size) 760 for i := range out { 761 vec := make([]float32, dim) 762 for j := range vec { 763 vec[j] = r.Float32() 764 } 765 766 out[i] = &models.Object{ 767 ID: strfmt.UUID(uuid.New().String()), 768 Class: "TestClass", 769 Vector: vec, 770 Properties: map[string]interface{}{ 771 "boolProp": i%2 == 0, 772 "index": i, 773 "indexProp": i, 774 "stringProp": fmt.Sprintf("s%02d", i), 775 "textArrayProp": []string{fmt.Sprintf("s%02d", i), fmt.Sprintf("%02d", i)}, 776 }, 777 } 778 } 779 780 return out 781 } 782 783 func multiShardRefClassData(r *rand.Rand, targets []*models.Object) []*models.Object { 784 // each class will link to all possible targets, so that we can be sure that 785 // we hit cross-shard links 786 targetLinks := make(models.MultipleRef, len(targets)) 787 for i, obj := range targets { 788 targetLinks[i] = &models.SingleRef{ 789 Beacon: strfmt.URI(crossref.NewLocalhost("", obj.ID).String()), 790 } 791 } 792 793 size := 20 794 dim := 10 795 out := make([]*models.Object, size) 796 for i := range out { 797 vec := make([]float32, dim) 798 for j := range vec { 799 vec[j] = r.Float32() 800 } 801 802 out[i] = &models.Object{ 803 ID: strfmt.UUID(uuid.New().String()), 804 Class: "TestRefClass", 805 Vector: vec, 806 Properties: map[string]interface{}{ 807 "toOther": targetLinks, 808 }, 809 } 810 } 811 812 return out 813 } 814 815 func bruteForceObjectsByQuery(objs []*models.Object, 816 query []float32, 817 ) []*models.Object { 818 type distanceAndObj struct { 819 distance float32 820 obj *models.Object 821 } 822 823 distProv := distancer.NewDotProductProvider() 824 distances := make([]distanceAndObj, len(objs)) 825 826 for i := range objs { 827 dist, _, _ := distProv.SingleDist(normalize(query), normalize(objs[i].Vector)) 828 distances[i] = distanceAndObj{ 829 distance: dist, 830 obj: objs[i], 831 } 832 } 833 834 sort.Slice(distances, func(a, b int) bool { 835 return distances[a].distance < distances[b].distance 836 }) 837 838 out := make([]*models.Object, len(objs)) 839 for i := range out { 840 out[i] = distances[i].obj 841 } 842 843 return out 844 } 845 846 func testClassesForImporting() []*models.Class { 847 return []*models.Class{ 848 { 849 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 850 InvertedIndexConfig: invertedConfig(), 851 Class: "TestClass", 852 Properties: []*models.Property{ 853 { 854 Name: "boolProp", 855 DataType: []string{string(schema.DataTypeBoolean)}, 856 }, 857 { 858 Name: "index", 859 DataType: []string{string(schema.DataTypeInt)}, 860 }, 861 { 862 Name: "indexProp", 863 DataType: []string{string(schema.DataTypeInt)}, 864 }, 865 { 866 Name: "stringProp", 867 DataType: schema.DataTypeText.PropString(), 868 Tokenization: models.PropertyTokenizationWhitespace, 869 }, 870 { 871 Name: "textArrayProp", 872 DataType: []string{string(schema.DataTypeTextArray)}, 873 }, 874 }, 875 }, 876 { 877 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 878 InvertedIndexConfig: invertedConfig(), 879 Class: "TestRefClass", 880 Properties: []*models.Property{ 881 { 882 Name: "boolProp", 883 DataType: []string{string(schema.DataTypeBoolean)}, 884 }, 885 { 886 Name: "toOther", 887 DataType: []string{"TestClass"}, 888 }, 889 { 890 Name: "indexProp", 891 DataType: []string{string(schema.DataTypeInt)}, 892 }, 893 { 894 Name: "stringProp", 895 DataType: schema.DataTypeText.PropString(), 896 Tokenization: models.PropertyTokenizationWhitespace, 897 }, 898 { 899 Name: "textArrayProp", 900 DataType: []string{string(schema.DataTypeTextArray)}, 901 }, 902 }, 903 }, 904 } 905 } 906 907 func normalize(v []float32) []float32 { 908 var norm float32 909 for i := range v { 910 norm += v[i] * v[i] 911 } 912 913 norm = float32(math.Sqrt(float64(norm))) 914 for i := range v { 915 v[i] = v[i] / norm 916 } 917 918 return v 919 }