github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/merge_integration_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 //go:build integrationTest 13 // +build integrationTest 14 15 package db 16 17 import ( 18 "context" 19 "fmt" 20 "testing" 21 "time" 22 23 "github.com/go-openapi/strfmt" 24 "github.com/google/uuid" 25 "github.com/sirupsen/logrus" 26 "github.com/stretchr/testify/assert" 27 "github.com/stretchr/testify/require" 28 "github.com/weaviate/weaviate/entities/additional" 29 "github.com/weaviate/weaviate/entities/dto" 30 "github.com/weaviate/weaviate/entities/filters" 31 "github.com/weaviate/weaviate/entities/models" 32 "github.com/weaviate/weaviate/entities/schema" 33 "github.com/weaviate/weaviate/entities/schema/crossref" 34 enthnsw "github.com/weaviate/weaviate/entities/vectorindex/hnsw" 35 "github.com/weaviate/weaviate/usecases/objects" 36 ) 37 38 func Test_MergingObjects(t *testing.T) { 39 dirName := t.TempDir() 40 41 logger := logrus.New() 42 schemaGetter := &fakeSchemaGetter{ 43 schema: schema.Schema{Objects: &models.Schema{Classes: nil}}, 44 shardState: singleShardState(), 45 } 46 repo, err := New(logger, Config{ 47 MemtablesFlushDirtyAfter: 60, 48 RootPath: dirName, 49 MaxImportGoroutinesFactor: 1, 50 TrackVectorDimensions: true, 51 }, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil) 52 require.Nil(t, err) 53 repo.SetSchemaGetter(schemaGetter) 54 require.Nil(t, repo.WaitForStartup(testCtx())) 55 defer repo.Shutdown(context.Background()) 56 migrator := NewMigrator(repo, logger) 57 58 sch := schema.Schema{ 59 Objects: &models.Schema{ 60 Classes: []*models.Class{ 61 { 62 Class: "MergeTestTarget", 63 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 64 InvertedIndexConfig: invertedConfig(), 65 Properties: []*models.Property{ 66 { 67 Name: "name", 68 DataType: schema.DataTypeText.PropString(), 69 Tokenization: models.PropertyTokenizationWhitespace, 70 }, 71 }, 72 }, 73 { 74 Class: "MergeTestSource", 75 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 76 InvertedIndexConfig: invertedConfig(), 77 Properties: []*models.Property{ // tries to have "one of each property type" 78 { 79 Name: "string", 80 DataType: schema.DataTypeText.PropString(), 81 Tokenization: models.PropertyTokenizationWhitespace, 82 }, 83 { 84 Name: "text", 85 DataType: []string{"text"}, 86 }, 87 { 88 Name: "number", 89 DataType: []string{"number"}, 90 }, 91 { 92 Name: "int", 93 DataType: []string{"int"}, 94 }, 95 { 96 Name: "date", 97 DataType: []string{"date"}, 98 }, 99 { 100 Name: "geo", 101 DataType: []string{"geoCoordinates"}, 102 }, 103 { 104 Name: "toTarget", 105 DataType: []string{"MergeTestTarget"}, 106 }, 107 }, 108 }, 109 { 110 Class: "MergeTestNoVector", 111 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 112 InvertedIndexConfig: invertedConfig(), 113 Properties: []*models.Property{ 114 { 115 Name: "foo", 116 DataType: schema.DataTypeText.PropString(), 117 Tokenization: models.PropertyTokenizationWhitespace, 118 }, 119 }, 120 }, 121 }, 122 }, 123 } 124 125 t.Run("add required classes", func(t *testing.T) { 126 for _, class := range sch.Objects.Classes { 127 t.Run(fmt.Sprintf("add %s", class.Class), func(t *testing.T) { 128 err := migrator.AddClass(context.Background(), class, schemaGetter.shardState) 129 require.Nil(t, err) 130 }) 131 } 132 }) 133 134 schemaGetter.schema = sch 135 136 target1 := strfmt.UUID("897be7cc-1ae1-4b40-89d9-d3ea98037638") 137 target2 := strfmt.UUID("5cc94aba-93e4-408a-ab19-3d803216a04e") 138 target3 := strfmt.UUID("81982705-8b1e-4228-b84c-911818d7ee85") 139 target4 := strfmt.UUID("7f69c263-17f4-4529-a54d-891a7c008ca4") 140 sourceID := strfmt.UUID("8738ddd5-a0ed-408d-a5d6-6f818fd56be6") 141 noVecID := strfmt.UUID("b4933761-88b2-4666-856d-298eb1ad0a59") 142 143 t.Run("add objects", func(t *testing.T) { 144 now := time.Now().UnixNano() / int64(time.Millisecond) 145 err := repo.PutObject(context.Background(), &models.Object{ 146 ID: sourceID, 147 Class: "MergeTestSource", 148 Properties: map[string]interface{}{ 149 "string": "only the string prop set", 150 }, 151 CreationTimeUnix: now, 152 LastUpdateTimeUnix: now, 153 }, []float32{0.5}, nil, nil) 154 require.Nil(t, err) 155 156 targetDimensionsBefore := GetDimensionsFromRepo(repo, "MergeTestTarget") 157 158 targets := []strfmt.UUID{target1, target2, target3, target4} 159 160 for i, target := range targets { 161 err = repo.PutObject(context.Background(), &models.Object{ 162 ID: target, 163 Class: "MergeTestTarget", 164 Properties: map[string]interface{}{ 165 "name": fmt.Sprintf("target item %d", i), 166 }, 167 }, []float32{0.5}, nil, nil) 168 require.Nil(t, err) 169 } 170 171 targetDimensionsAfter := GetDimensionsFromRepo(repo, "MergeTestTarget") 172 require.Equal(t, targetDimensionsBefore+4, targetDimensionsAfter) 173 174 err = repo.PutObject(context.Background(), &models.Object{ 175 ID: noVecID, 176 Class: "MergeTestNoVector", 177 Properties: map[string]interface{}{ 178 "foo": "bar", 179 }, 180 CreationTimeUnix: now, 181 LastUpdateTimeUnix: now, 182 }, nil, nil, nil) 183 require.Nil(t, err) 184 185 targetDimensionsAfterNoVec := GetDimensionsFromRepo(repo, "MergeTestTarget") 186 require.Equal(t, targetDimensionsAfter, targetDimensionsAfterNoVec) 187 }) 188 189 var lastUpdateTimeUnix int64 190 191 t.Run("fetch original object's update timestamp", func(t *testing.T) { 192 source, err := repo.ObjectByID(context.Background(), sourceID, nil, additional.Properties{ 193 LastUpdateTimeUnix: true, 194 }, "") 195 require.Nil(t, err) 196 197 lastUpdateTimeUnix = source.Object().LastUpdateTimeUnix 198 require.NotEmpty(t, lastUpdateTimeUnix) 199 }) 200 201 t.Run("merge other previously unset properties into it", func(t *testing.T) { 202 // give the lastUpdateTimeUnix time to be different. 203 // on some machines this may not be needed, but for 204 // faster processors, the difference is undetectable 205 time.Sleep(time.Millisecond) 206 207 md := objects.MergeDocument{ 208 Class: "MergeTestSource", 209 ID: sourceID, 210 PrimitiveSchema: map[string]interface{}{ 211 "number": 7.0, 212 "int": int64(9), 213 "geo": &models.GeoCoordinates{ 214 Latitude: ptFloat32(30.2), 215 Longitude: ptFloat32(60.2), 216 }, 217 "text": "some text", 218 }, 219 UpdateTime: time.Now().UnixNano() / int64(time.Millisecond), 220 } 221 222 err := repo.Merge(context.Background(), md, nil, "") 223 assert.Nil(t, err) 224 }) 225 226 t.Run("compare merge object's update time with original", func(t *testing.T) { 227 source, err := repo.ObjectByID(context.Background(), sourceID, nil, additional.Properties{ 228 LastUpdateTimeUnix: true, 229 }, "") 230 require.Nil(t, err) 231 232 assert.Greater(t, source.Object().LastUpdateTimeUnix, lastUpdateTimeUnix) 233 }) 234 235 t.Run("check that the object was successfully merged", func(t *testing.T) { 236 source, err := repo.ObjectByID(context.Background(), sourceID, nil, additional.Properties{}, "") 237 require.Nil(t, err) 238 239 sch := source.Object().Properties.(map[string]interface{}) 240 expectedSchema := map[string]interface{}{ 241 // from original 242 "string": "only the string prop set", 243 244 // from merge 245 "number": 7.0, 246 "int": float64(9), 247 "geo": &models.GeoCoordinates{ 248 Latitude: ptFloat32(30.2), 249 Longitude: ptFloat32(60.2), 250 }, 251 "text": "some text", 252 } 253 254 assert.Equal(t, expectedSchema, sch) 255 }) 256 257 t.Run("trying to merge from non-existing index", func(t *testing.T) { 258 md := objects.MergeDocument{ 259 Class: "WrongClass", 260 ID: sourceID, 261 PrimitiveSchema: map[string]interface{}{ 262 "number": 7.0, 263 }, 264 } 265 266 err := repo.Merge(context.Background(), md, nil, "") 267 assert.Equal(t, fmt.Errorf( 268 "merge from non-existing index for WrongClass"), err) 269 }) 270 t.Run("add a reference and replace one prop", func(t *testing.T) { 271 source, err := crossref.ParseSource(fmt.Sprintf( 272 "weaviate://localhost/MergeTestSource/%s/toTarget", sourceID)) 273 require.Nil(t, err) 274 targets := []strfmt.UUID{target1} 275 refs := make(objects.BatchReferences, len(targets)) 276 for i, target := range targets { 277 to, err := crossref.Parse(fmt.Sprintf("weaviate://localhost/%s", target)) 278 require.Nil(t, err) 279 refs[i] = objects.BatchReference{ 280 Err: nil, 281 From: source, 282 To: to, 283 } 284 } 285 md := objects.MergeDocument{ 286 Class: "MergeTestSource", 287 ID: sourceID, 288 PrimitiveSchema: map[string]interface{}{ 289 "string": "let's update the string prop", 290 }, 291 References: refs, 292 } 293 err = repo.Merge(context.Background(), md, nil, "") 294 assert.Nil(t, err) 295 }) 296 297 t.Run("check that the object was successfully merged", func(t *testing.T) { 298 source, err := repo.ObjectByID(context.Background(), sourceID, nil, additional.Properties{}, "") 299 require.Nil(t, err) 300 301 ref, err := crossref.Parse(fmt.Sprintf("weaviate://localhost/%s", target1)) 302 require.Nil(t, err) 303 304 sch := source.Object().Properties.(map[string]interface{}) 305 expectedSchema := map[string]interface{}{ 306 "string": "let's update the string prop", 307 "number": 7.0, 308 "int": float64(9), 309 "geo": &models.GeoCoordinates{ 310 Latitude: ptFloat32(30.2), 311 Longitude: ptFloat32(60.2), 312 }, 313 "text": "some text", 314 "toTarget": models.MultipleRef{ 315 ref.SingleRef(), 316 }, 317 } 318 319 assert.Equal(t, expectedSchema, sch) 320 }) 321 322 t.Run("add more references in rapid succession", func(t *testing.T) { 323 // this test case prevents a regression on gh-1016 324 source, err := crossref.ParseSource(fmt.Sprintf( 325 "weaviate://localhost/MergeTestSource/%s/toTarget", sourceID)) 326 require.Nil(t, err) 327 targets := []strfmt.UUID{target2, target3, target4} 328 refs := make(objects.BatchReferences, len(targets)) 329 for i, target := range targets { 330 to, err := crossref.Parse(fmt.Sprintf("weaviate://localhost/%s", target)) 331 require.Nil(t, err) 332 refs[i] = objects.BatchReference{ 333 Err: nil, 334 From: source, 335 To: to, 336 } 337 } 338 md := objects.MergeDocument{ 339 Class: "MergeTestSource", 340 ID: sourceID, 341 References: refs, 342 } 343 err = repo.Merge(context.Background(), md, nil, "") 344 assert.Nil(t, err) 345 }) 346 347 t.Run("check all references are now present", func(t *testing.T) { 348 source, err := repo.ObjectByID(context.Background(), sourceID, nil, additional.Properties{}, "") 349 require.Nil(t, err) 350 351 refs := source.Object().Properties.(map[string]interface{})["toTarget"] 352 refsSlice, ok := refs.(models.MultipleRef) 353 require.True(t, ok, fmt.Sprintf("toTarget must be models.MultipleRef, but got %#v", refs)) 354 355 foundBeacons := []string{} 356 for _, ref := range refsSlice { 357 foundBeacons = append(foundBeacons, ref.Beacon.String()) 358 } 359 expectedBeacons := []string{ 360 fmt.Sprintf("weaviate://localhost/%s", target1), 361 fmt.Sprintf("weaviate://localhost/%s", target2), 362 fmt.Sprintf("weaviate://localhost/%s", target3), 363 fmt.Sprintf("weaviate://localhost/%s", target4), 364 } 365 366 assert.ElementsMatch(t, foundBeacons, expectedBeacons) 367 }) 368 369 t.Run("merge object with no vector", func(t *testing.T) { 370 err = repo.Merge(context.Background(), objects.MergeDocument{ 371 Class: "MergeTestNoVector", 372 ID: noVecID, 373 PrimitiveSchema: map[string]interface{}{"foo": "baz"}, 374 }, nil, "") 375 require.Nil(t, err) 376 377 orig, err := repo.ObjectByID(context.Background(), noVecID, nil, additional.Properties{}, "") 378 require.Nil(t, err) 379 380 expectedSchema := map[string]interface{}{ 381 "foo": "baz", 382 "id": noVecID, 383 } 384 385 assert.Equal(t, expectedSchema, orig.Schema) 386 }) 387 } 388 389 // This prevents a regression on 390 // https://github.com/weaviate/weaviate/issues/2193 391 // 392 // Prior to the fix it was possible that a prop that was not touched during the 393 // merge (and therefore only loaded from disk) failed during the 394 // inverted-indexing for the new doc id. This was then hidden by the fact that 395 // error handling was broken inside the inverted.Analyzer. This test tries to 396 // make sure that every possible property type stays intact if untouched 397 // during a Merge operation 398 // 399 // To achieve this, every prop in this class exists twice, once with the prefix 400 // 'touched_' and once with 'untouched_'. In the initial insert both properties 401 // contain the same value, but then during the patch merge, the 'touched_' 402 // properties are updated to a different value while the 'untouched_' 403 // properties are left untouched. Then we try to retrieve the object through a 404 // filter matching each property. The 'untouched_' properties are matched with 405 // the original value, the 'touched_' props are matched with the updated ones 406 func Test_Merge_UntouchedPropsCorrectlyIndexed(t *testing.T) { 407 dirName := t.TempDir() 408 409 logger := logrus.New() 410 schemaGetter := &fakeSchemaGetter{ 411 schema: schema.Schema{Objects: &models.Schema{Classes: nil}}, 412 shardState: singleShardState(), 413 } 414 repo, err := New(logger, Config{ 415 MemtablesFlushDirtyAfter: 60, 416 RootPath: dirName, 417 MaxImportGoroutinesFactor: 1, 418 QueryMaximumResults: 10000, 419 TrackVectorDimensions: true, 420 }, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil) 421 require.Nil(t, err) 422 repo.SetSchemaGetter(schemaGetter) 423 require.Nil(t, repo.WaitForStartup(testCtx())) 424 defer repo.Shutdown(context.Background()) 425 migrator := NewMigrator(repo, logger) 426 hnswConfig := enthnsw.NewDefaultUserConfig() 427 hnswConfig.Skip = true 428 sch := schema.Schema{ 429 Objects: &models.Schema{ 430 Classes: []*models.Class{ 431 { 432 Class: "TestClass", 433 VectorIndexConfig: hnswConfig, 434 InvertedIndexConfig: invertedConfig(), 435 Properties: []*models.Property{ // tries to have "one of each property type" 436 { 437 Name: "untouched_string", 438 DataType: schema.DataTypeText.PropString(), 439 Tokenization: models.PropertyTokenizationWhitespace, 440 }, 441 { 442 Name: "touched_string", 443 DataType: schema.DataTypeText.PropString(), 444 Tokenization: models.PropertyTokenizationWhitespace, 445 }, 446 { 447 Name: "untouched_string_array", 448 DataType: schema.DataTypeTextArray.PropString(), 449 Tokenization: models.PropertyTokenizationWhitespace, 450 }, 451 { 452 Name: "touched_string_array", 453 DataType: schema.DataTypeTextArray.PropString(), 454 Tokenization: models.PropertyTokenizationWhitespace, 455 }, 456 { 457 Name: "untouched_text", Tokenization: "word", 458 DataType: []string{"text"}, 459 }, 460 { 461 Name: "touched_text", Tokenization: "word", 462 DataType: []string{"text"}, 463 }, 464 { 465 Name: "untouched_text_array", Tokenization: "word", 466 DataType: []string{"text[]"}, 467 }, 468 { 469 Name: "touched_text_array", Tokenization: "word", 470 DataType: []string{"text[]"}, 471 }, 472 {Name: "untouched_number", DataType: []string{"number"}}, 473 {Name: "touched_number", DataType: []string{"number"}}, 474 {Name: "untouched_number_array", DataType: []string{"number[]"}}, 475 {Name: "touched_number_array", DataType: []string{"number[]"}}, 476 {Name: "untouched_int", DataType: []string{"int"}}, 477 {Name: "touched_int", DataType: []string{"int"}}, 478 {Name: "untouched_int_array", DataType: []string{"int[]"}}, 479 {Name: "touched_int_array", DataType: []string{"int[]"}}, 480 {Name: "untouched_date", DataType: []string{"date"}}, 481 {Name: "touched_date", DataType: []string{"date"}}, 482 {Name: "untouched_date_array", DataType: []string{"date[]"}}, 483 {Name: "touched_date_array", DataType: []string{"date[]"}}, 484 {Name: "untouched_geo", DataType: []string{"geoCoordinates"}}, 485 {Name: "touched_geo", DataType: []string{"geoCoordinates"}}, 486 }, 487 }, 488 }, 489 }, 490 } 491 492 t.Run("add required classes", func(t *testing.T) { 493 for _, class := range sch.Objects.Classes { 494 t.Run(fmt.Sprintf("add %s", class.Class), func(t *testing.T) { 495 err := migrator.AddClass(context.Background(), class, schemaGetter.shardState) 496 require.Nil(t, err) 497 }) 498 } 499 }) 500 501 schemaGetter.schema = sch 502 503 t.Run("add initial object", func(t *testing.T) { 504 id := 0 505 err := repo.PutObject(context.Background(), &models.Object{ 506 ID: uuidFromInt(id), 507 Class: "TestClass", 508 Properties: map[string]interface{}{ 509 "untouched_number": float64(id), 510 "untouched_number_array": []interface{}{float64(id)}, 511 "untouched_int": id, 512 "untouched_int_array": []interface{}{int64(id)}, 513 "untouched_string": fmt.Sprintf("%d", id), 514 "untouched_string_array": []string{fmt.Sprintf("%d", id)}, 515 "untouched_text": fmt.Sprintf("%d", id), 516 "untouched_text_array": []string{fmt.Sprintf("%d", id)}, 517 "untouched_date": time.Unix(0, 0).Add(time.Duration(id) * time.Hour), 518 "untouched_date_array": []time.Time{time.Unix(0, 0).Add(time.Duration(id) * time.Hour)}, 519 "untouched_geo": &models.GeoCoordinates{ 520 ptFloat32(float32(id)), ptFloat32(float32(id)), 521 }, 522 523 "touched_number": float64(id), 524 "touched_number_array": []interface{}{float64(id)}, 525 "touched_int": id, 526 "touched_int_array": []interface{}{int64(id)}, 527 "touched_string": fmt.Sprintf("%d", id), 528 "touched_string_array": []string{fmt.Sprintf("%d", id)}, 529 "touched_text": fmt.Sprintf("%d", id), 530 "touched_text_array": []string{fmt.Sprintf("%d", id)}, 531 "touched_date": time.Unix(0, 0).Add(time.Duration(id) * time.Hour), 532 "touched_date_array": []time.Time{time.Unix(0, 0).Add(time.Duration(id) * time.Hour)}, 533 "touched_geo": &models.GeoCoordinates{ 534 ptFloat32(float32(id)), ptFloat32(float32(id)), 535 }, 536 }, 537 CreationTimeUnix: int64(id), 538 LastUpdateTimeUnix: int64(id), 539 }, []float32{0.5}, nil, nil) 540 require.Nil(t, err) 541 }) 542 543 t.Run("patch half the props (all that contain 'touched')", func(t *testing.T) { 544 updateID := 28 545 md := objects.MergeDocument{ 546 Class: "TestClass", 547 ID: uuidFromInt(0), 548 PrimitiveSchema: map[string]interface{}{ 549 "touched_number": float64(updateID), 550 "touched_number_array": []interface{}{float64(updateID)}, 551 "touched_int": updateID, 552 "touched_int_array": []interface{}{int64(updateID)}, 553 "touched_string": fmt.Sprintf("%d", updateID), 554 "touched_string_array": []string{fmt.Sprintf("%d", updateID)}, 555 "touched_text": fmt.Sprintf("%d", updateID), 556 "touched_text_array": []string{fmt.Sprintf("%d", updateID)}, 557 "touched_date": time.Unix(0, 0).Add(time.Duration(updateID) * time.Hour), 558 "touched_date_array": []time.Time{time.Unix(0, 0).Add(time.Duration(updateID) * time.Hour)}, 559 "touched_geo": &models.GeoCoordinates{ 560 ptFloat32(float32(updateID)), ptFloat32(float32(updateID)), 561 }, 562 }, 563 References: nil, 564 } 565 err = repo.Merge(context.Background(), md, nil, "") 566 assert.Nil(t, err) 567 }) 568 569 t.Run("retrieve by each individual prop", func(t *testing.T) { 570 retrieve := func(prefix string, id int) func(t *testing.T) { 571 return func(t *testing.T) { 572 type test struct { 573 name string 574 filter *filters.LocalFilter 575 } 576 577 tests := []test{ 578 { 579 name: "string filter", 580 filter: buildFilter( 581 fmt.Sprintf("%s_string", prefix), 582 fmt.Sprintf("%d", id), 583 eq, 584 schema.DataTypeText), 585 }, 586 { 587 name: "string array filter", 588 filter: buildFilter( 589 fmt.Sprintf("%s_string_array", prefix), 590 fmt.Sprintf("%d", id), 591 eq, 592 schema.DataTypeText), 593 }, 594 { 595 name: "text filter", 596 filter: buildFilter( 597 fmt.Sprintf("%s_text", prefix), 598 fmt.Sprintf("%d", id), 599 eq, 600 dtText), 601 }, 602 { 603 name: "text array filter", 604 filter: buildFilter( 605 fmt.Sprintf("%s_text_array", prefix), 606 fmt.Sprintf("%d", id), 607 eq, 608 dtText), 609 }, 610 { 611 name: "int filter", 612 filter: buildFilter( 613 fmt.Sprintf("%s_int", prefix), id, eq, dtInt), 614 }, 615 { 616 name: "int array filter", 617 filter: buildFilter( 618 fmt.Sprintf("%s_int_array", prefix), id, eq, dtInt), 619 }, 620 { 621 name: "number filter", 622 filter: buildFilter( 623 fmt.Sprintf("%s_number", prefix), float64(id), eq, dtNumber), 624 }, 625 { 626 name: "number array filter", 627 filter: buildFilter( 628 fmt.Sprintf("%s_number_array", prefix), float64(id), eq, dtNumber), 629 }, 630 { 631 name: "date filter", 632 filter: buildFilter( 633 fmt.Sprintf("%s_date", prefix), 634 time.Unix(0, 0).Add(time.Duration(id)*time.Hour), 635 eq, dtDate), 636 }, 637 { 638 name: "date array filter", 639 filter: buildFilter( 640 fmt.Sprintf("%s_date_array", prefix), 641 time.Unix(0, 0).Add(time.Duration(id)*time.Hour), 642 eq, dtDate), 643 }, 644 { 645 name: "geoFilter filter", 646 filter: buildFilter( 647 fmt.Sprintf("%s_geo", prefix), 648 filters.GeoRange{ 649 GeoCoordinates: &models.GeoCoordinates{ 650 ptFloat32(float32(id)), ptFloat32(float32(id)), 651 }, 652 Distance: 2, 653 }, 654 wgr, dtGeoCoordinates), 655 }, 656 } 657 658 for _, tc := range tests { 659 t.Run(tc.name, func(t *testing.T) { 660 params := dto.GetParams{ 661 ClassName: "TestClass", 662 Pagination: &filters.Pagination{Limit: 5}, 663 Filters: tc.filter, 664 } 665 res, err := repo.VectorSearch(context.Background(), params) 666 require.Nil(t, err) 667 require.Len(t, res, 1) 668 669 // hard-code the only uuid 670 assert.Equal(t, uuidFromInt(0), res[0].ID) 671 }) 672 } 673 } 674 } 675 t.Run("using untouched", retrieve("untouched", 0)) 676 t.Run("using touched", retrieve("touched", 28)) 677 }) 678 } 679 680 func Test_MergeDocIdPreserved_PropsCorrectlyIndexed(t *testing.T) { 681 dirName := t.TempDir() 682 683 logger := logrus.New() 684 schemaGetter := &fakeSchemaGetter{ 685 schema: schema.Schema{Objects: &models.Schema{Classes: nil}}, 686 shardState: singleShardState(), 687 } 688 repo, err := New(logger, Config{ 689 MemtablesFlushDirtyAfter: 60, 690 RootPath: dirName, 691 MaxImportGoroutinesFactor: 1, 692 QueryMaximumResults: 10000, 693 TrackVectorDimensions: true, 694 }, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil) 695 require.Nil(t, err) 696 repo.SetSchemaGetter(schemaGetter) 697 require.Nil(t, repo.WaitForStartup(testCtx())) 698 defer repo.Shutdown(context.Background()) 699 migrator := NewMigrator(repo, logger) 700 hnswConfig := enthnsw.NewDefaultUserConfig() 701 hnswConfig.Skip = true 702 sch := schema.Schema{ 703 Objects: &models.Schema{ 704 Classes: []*models.Class{ 705 { 706 Class: "TestClass", 707 VectorIndexConfig: hnswConfig, 708 InvertedIndexConfig: invertedConfig(), 709 Properties: []*models.Property{ // tries to have "one of each property type" 710 { 711 Name: "untouched_string", 712 DataType: schema.DataTypeText.PropString(), 713 Tokenization: models.PropertyTokenizationWhitespace, 714 }, 715 { 716 Name: "touched_string", 717 DataType: schema.DataTypeText.PropString(), 718 Tokenization: models.PropertyTokenizationWhitespace, 719 }, 720 { 721 Name: "untouched_string_array", 722 DataType: schema.DataTypeTextArray.PropString(), 723 Tokenization: models.PropertyTokenizationWhitespace, 724 }, 725 { 726 Name: "touched_string_array", 727 DataType: schema.DataTypeTextArray.PropString(), 728 Tokenization: models.PropertyTokenizationWhitespace, 729 }, 730 { 731 Name: "untouched_text", Tokenization: "word", 732 DataType: []string{"text"}, 733 }, 734 { 735 Name: "touched_text", Tokenization: "word", 736 DataType: []string{"text"}, 737 }, 738 { 739 Name: "untouched_text_array", Tokenization: "word", 740 DataType: []string{"text[]"}, 741 }, 742 { 743 Name: "touched_text_array", Tokenization: "word", 744 DataType: []string{"text[]"}, 745 }, 746 {Name: "untouched_number", DataType: []string{"number"}}, 747 {Name: "touched_number", DataType: []string{"number"}}, 748 {Name: "untouched_number_array", DataType: []string{"number[]"}}, 749 {Name: "touched_number_array", DataType: []string{"number[]"}}, 750 {Name: "untouched_int", DataType: []string{"int"}}, 751 {Name: "touched_int", DataType: []string{"int"}}, 752 {Name: "untouched_int_array", DataType: []string{"int[]"}}, 753 {Name: "touched_int_array", DataType: []string{"int[]"}}, 754 {Name: "untouched_date", DataType: []string{"date"}}, 755 {Name: "touched_date", DataType: []string{"date"}}, 756 {Name: "untouched_date_array", DataType: []string{"date[]"}}, 757 {Name: "touched_date_array", DataType: []string{"date[]"}}, 758 }, 759 }, 760 }, 761 }, 762 } 763 764 t.Run("add required classes", func(t *testing.T) { 765 for _, class := range sch.Objects.Classes { 766 t.Run(fmt.Sprintf("add %s", class.Class), func(t *testing.T) { 767 err := migrator.AddClass(context.Background(), class, schemaGetter.shardState) 768 require.Nil(t, err) 769 }) 770 } 771 }) 772 773 schemaGetter.schema = sch 774 775 t.Run("add initial object", func(t *testing.T) { 776 id := 0 777 err := repo.PutObject(context.Background(), &models.Object{ 778 ID: uuidFromInt(id), 779 Class: "TestClass", 780 Properties: map[string]interface{}{ 781 "untouched_number": float64(id), 782 "untouched_number_array": []interface{}{float64(id)}, 783 "untouched_int": id, 784 "untouched_int_array": []interface{}{int64(id)}, 785 "untouched_string": fmt.Sprintf("%d", id), 786 "untouched_string_array": []string{fmt.Sprintf("%d", id)}, 787 "untouched_text": fmt.Sprintf("%d", id), 788 "untouched_text_array": []string{fmt.Sprintf("%d", id)}, 789 "untouched_date": time.Unix(0, 0).Add(time.Duration(id) * time.Hour), 790 "untouched_date_array": []time.Time{time.Unix(0, 0).Add(time.Duration(id) * time.Hour)}, 791 792 "touched_number": float64(id), 793 "touched_number_array": []interface{}{float64(id)}, 794 "touched_int": id, 795 "touched_int_array": []interface{}{int64(id)}, 796 "touched_string": fmt.Sprintf("%d", id), 797 "touched_string_array": []string{fmt.Sprintf("%d", id)}, 798 "touched_text": fmt.Sprintf("%d", id), 799 "touched_text_array": []string{fmt.Sprintf("%d", id)}, 800 "touched_date": time.Unix(0, 0).Add(time.Duration(id) * time.Hour), 801 "touched_date_array": []time.Time{time.Unix(0, 0).Add(time.Duration(id) * time.Hour)}, 802 }, 803 CreationTimeUnix: int64(id), 804 LastUpdateTimeUnix: int64(id), 805 }, []float32{0.5}, nil, nil) 806 require.Nil(t, err) 807 }) 808 809 t.Run("patch half the props (all that contain 'touched')", func(t *testing.T) { 810 updateID := 28 811 md := objects.MergeDocument{ 812 Class: "TestClass", 813 ID: uuidFromInt(0), 814 PrimitiveSchema: map[string]interface{}{ 815 "touched_number": float64(updateID), 816 "touched_number_array": []interface{}{float64(updateID)}, 817 "touched_int": updateID, 818 "touched_int_array": []interface{}{int64(updateID)}, 819 "touched_string": fmt.Sprintf("%d", updateID), 820 "touched_string_array": []string{fmt.Sprintf("%d", updateID)}, 821 "touched_text": fmt.Sprintf("%d", updateID), 822 "touched_text_array": []string{fmt.Sprintf("%d", updateID)}, 823 "touched_date": time.Unix(0, 0).Add(time.Duration(updateID) * time.Hour), 824 "touched_date_array": []time.Time{time.Unix(0, 0).Add(time.Duration(updateID) * time.Hour)}, 825 }, 826 References: nil, 827 } 828 err = repo.Merge(context.Background(), md, nil, "") 829 assert.Nil(t, err) 830 }) 831 832 t.Run("retrieve by each individual prop", func(t *testing.T) { 833 retrieve := func(prefix string, id int) func(t *testing.T) { 834 return func(t *testing.T) { 835 type test struct { 836 name string 837 filter *filters.LocalFilter 838 } 839 840 tests := []test{ 841 { 842 name: "string filter", 843 filter: buildFilter( 844 fmt.Sprintf("%s_string", prefix), 845 fmt.Sprintf("%d", id), 846 eq, 847 schema.DataTypeText), 848 }, 849 { 850 name: "string array filter", 851 filter: buildFilter( 852 fmt.Sprintf("%s_string_array", prefix), 853 fmt.Sprintf("%d", id), 854 eq, 855 schema.DataTypeText), 856 }, 857 { 858 name: "text filter", 859 filter: buildFilter( 860 fmt.Sprintf("%s_text", prefix), 861 fmt.Sprintf("%d", id), 862 eq, 863 dtText), 864 }, 865 { 866 name: "text array filter", 867 filter: buildFilter( 868 fmt.Sprintf("%s_text_array", prefix), 869 fmt.Sprintf("%d", id), 870 eq, 871 dtText), 872 }, 873 { 874 name: "int filter", 875 filter: buildFilter( 876 fmt.Sprintf("%s_int", prefix), id, eq, dtInt), 877 }, 878 { 879 name: "int array filter", 880 filter: buildFilter( 881 fmt.Sprintf("%s_int_array", prefix), id, eq, dtInt), 882 }, 883 { 884 name: "number filter", 885 filter: buildFilter( 886 fmt.Sprintf("%s_number", prefix), float64(id), eq, dtNumber), 887 }, 888 { 889 name: "number array filter", 890 filter: buildFilter( 891 fmt.Sprintf("%s_number_array", prefix), float64(id), eq, dtNumber), 892 }, 893 { 894 name: "date filter", 895 filter: buildFilter( 896 fmt.Sprintf("%s_date", prefix), 897 time.Unix(0, 0).Add(time.Duration(id)*time.Hour), 898 eq, dtDate), 899 }, 900 { 901 name: "date array filter", 902 filter: buildFilter( 903 fmt.Sprintf("%s_date_array", prefix), 904 time.Unix(0, 0).Add(time.Duration(id)*time.Hour), 905 eq, dtDate), 906 }, 907 } 908 909 for _, tc := range tests { 910 t.Run(tc.name, func(t *testing.T) { 911 params := dto.GetParams{ 912 ClassName: "TestClass", 913 Pagination: &filters.Pagination{Limit: 5}, 914 Filters: tc.filter, 915 } 916 res, err := repo.VectorSearch(context.Background(), params) 917 require.Nil(t, err) 918 require.Len(t, res, 1) 919 920 // hard-code the only uuid 921 assert.Equal(t, uuidFromInt(0), res[0].ID) 922 }) 923 } 924 } 925 } 926 t.Run("using untouched", retrieve("untouched", 0)) 927 t.Run("using touched", retrieve("touched", 28)) 928 }) 929 } 930 931 func uuidFromInt(in int) strfmt.UUID { 932 return strfmt.UUID(uuid.MustParse(fmt.Sprintf("%032d", in)).String()) 933 }