github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted_index_integration_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 //go:build integrationTest 13 14 package db 15 16 import ( 17 "context" 18 "fmt" 19 "testing" 20 "time" 21 22 "github.com/go-openapi/strfmt" 23 "github.com/sirupsen/logrus" 24 "github.com/stretchr/testify/assert" 25 "github.com/stretchr/testify/require" 26 "github.com/weaviate/weaviate/adapters/repos/db/helpers" 27 "github.com/weaviate/weaviate/entities/dto" 28 "github.com/weaviate/weaviate/entities/filters" 29 "github.com/weaviate/weaviate/entities/models" 30 "github.com/weaviate/weaviate/entities/schema" 31 "github.com/weaviate/weaviate/entities/vectorindex/hnsw" 32 enthnsw "github.com/weaviate/weaviate/entities/vectorindex/hnsw" 33 ) 34 35 func TestIndexByTimestampsNullStatePropLength_AddClass(t *testing.T) { 36 dirName := t.TempDir() 37 vFalse := false 38 vTrue := true 39 40 class := &models.Class{ 41 Class: "TestClass", 42 VectorIndexConfig: hnsw.NewDefaultUserConfig(), 43 InvertedIndexConfig: &models.InvertedIndexConfig{ 44 CleanupIntervalSeconds: 60, 45 Stopwords: &models.StopwordConfig{ 46 Preset: "none", 47 }, 48 IndexTimestamps: true, 49 IndexNullState: true, 50 IndexPropertyLength: true, 51 }, 52 Properties: []*models.Property{ 53 { 54 Name: "initialWithIINil", 55 DataType: schema.DataTypeText.PropString(), 56 Tokenization: models.PropertyTokenizationWhitespace, 57 }, 58 { 59 Name: "initialWithIITrue", 60 DataType: schema.DataTypeText.PropString(), 61 Tokenization: models.PropertyTokenizationWhitespace, 62 IndexFilterable: &vTrue, 63 IndexSearchable: &vTrue, 64 }, 65 { 66 Name: "initialWithoutII", 67 DataType: schema.DataTypeText.PropString(), 68 Tokenization: models.PropertyTokenizationWhitespace, 69 IndexFilterable: &vFalse, 70 IndexSearchable: &vFalse, 71 }, 72 }, 73 } 74 shardState := singleShardState() 75 logger := logrus.New() 76 schemaGetter := &fakeSchemaGetter{shardState: shardState, schema: schema.Schema{ 77 Objects: &models.Schema{ 78 Classes: []*models.Class{class}, 79 }, 80 }} 81 repo, err := New(logger, Config{ 82 MemtablesFlushDirtyAfter: 60, 83 RootPath: dirName, 84 QueryMaximumResults: 10000, 85 MaxImportGoroutinesFactor: 1, 86 }, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil) 87 require.Nil(t, err) 88 repo.SetSchemaGetter(schemaGetter) 89 require.Nil(t, repo.WaitForStartup(testCtx())) 90 defer repo.Shutdown(context.Background()) 91 92 migrator := NewMigrator(repo, logger) 93 require.Nil(t, migrator.AddClass(context.Background(), class, schemaGetter.shardState)) 94 95 require.Nil(t, migrator.AddProperty(context.Background(), class.Class, &models.Property{ 96 Name: "updateWithIINil", 97 DataType: schema.DataTypeText.PropString(), 98 Tokenization: models.PropertyTokenizationWhitespace, 99 })) 100 require.Nil(t, migrator.AddProperty(context.Background(), class.Class, &models.Property{ 101 Name: "updateWithIITrue", 102 DataType: schema.DataTypeText.PropString(), 103 Tokenization: models.PropertyTokenizationWhitespace, 104 IndexFilterable: &vTrue, 105 IndexSearchable: &vTrue, 106 })) 107 require.Nil(t, migrator.AddProperty(context.Background(), class.Class, &models.Property{ 108 Name: "updateWithoutII", 109 DataType: schema.DataTypeText.PropString(), 110 Tokenization: models.PropertyTokenizationWhitespace, 111 IndexFilterable: &vFalse, 112 IndexSearchable: &vFalse, 113 })) 114 115 t.Run("check for additional buckets", func(t *testing.T) { 116 for _, idx := range migrator.db.indices { 117 idx.ForEachShard(func(_ string, shd ShardLike) error { 118 createBucket := shd.Store().Bucket("property__creationTimeUnix") 119 assert.NotNil(t, createBucket) 120 121 updateBucket := shd.Store().Bucket("property__lastUpdateTimeUnix") 122 assert.NotNil(t, updateBucket) 123 124 cases := []struct { 125 prop string 126 compareFunc func(t assert.TestingT, object interface{}, msgAndArgs ...interface{}) bool 127 }{ 128 {prop: "initialWithIINil", compareFunc: assert.NotNil}, 129 {prop: "initialWithIITrue", compareFunc: assert.NotNil}, 130 {prop: "initialWithoutII", compareFunc: assert.Nil}, 131 {prop: "updateWithIINil", compareFunc: assert.NotNil}, 132 {prop: "updateWithIITrue", compareFunc: assert.NotNil}, 133 {prop: "updateWithoutII", compareFunc: assert.Nil}, 134 } 135 for _, tt := range cases { 136 tt.compareFunc(t, shd.Store().Bucket("property_"+tt.prop+filters.InternalNullIndex)) 137 tt.compareFunc(t, shd.Store().Bucket("property_"+tt.prop+filters.InternalPropertyLength)) 138 } 139 return nil 140 }) 141 } 142 }) 143 144 t.Run("Add Objects", func(t *testing.T) { 145 testID1 := strfmt.UUID("a0b55b05-bc5b-4cc9-b646-1452d1390a62") 146 objWithProperty := &models.Object{ 147 ID: testID1, 148 Class: "TestClass", 149 Properties: map[string]interface{}{"initialWithIINil": "0", "initialWithIITrue": "0", "initialWithoutII": "1", "updateWithIINil": "2", "updateWithIITrue": "2", "updateWithoutII": "3"}, 150 } 151 vec := []float32{1, 2, 3} 152 require.Nil(t, repo.PutObject(context.Background(), objWithProperty, vec, nil, nil)) 153 154 testID2 := strfmt.UUID("a0b55b05-bc5b-4cc9-b646-1452d1390a63") 155 objWithoutProperty := &models.Object{ 156 ID: testID2, 157 Class: "TestClass", 158 Properties: map[string]interface{}{}, 159 } 160 require.Nil(t, repo.PutObject(context.Background(), objWithoutProperty, vec, nil, nil)) 161 162 testID3 := strfmt.UUID("a0b55b05-bc5b-4cc9-b646-1452d1390a64") 163 objWithNilProperty := &models.Object{ 164 ID: testID3, 165 Class: "TestClass", 166 Properties: map[string]interface{}{"initialWithIINil": nil, "initialWithIITrue": nil, "initialWithoutII": nil, "updateWithIINil": nil, "updateWithIITrue": nil, "updateWithoutII": nil}, 167 } 168 require.Nil(t, repo.PutObject(context.Background(), objWithNilProperty, vec, nil, nil)) 169 }) 170 171 t.Run("delete class", func(t *testing.T) { 172 require.Nil(t, migrator.DropClass(context.Background(), class.Class)) 173 for _, idx := range migrator.db.indices { 174 idx.ForEachShard(func(name string, shd ShardLike) error { 175 require.Nil(t, shd.Store().Bucket("property__creationTimeUnix")) 176 require.Nil(t, shd.Store().Bucket("property_name"+filters.InternalNullIndex)) 177 require.Nil(t, shd.Store().Bucket("property_name"+filters.InternalPropertyLength)) 178 return nil 179 }) 180 } 181 }) 182 } 183 184 func TestIndexNullState_GetClass(t *testing.T) { 185 dirName := t.TempDir() 186 187 testID1 := strfmt.UUID("a0b55b05-bc5b-4cc9-b646-1452d1390a62") 188 testID2 := strfmt.UUID("65be32cc-bb74-49c7-833e-afb14f957eae") 189 refID1 := strfmt.UUID("f2e42a9f-e0b5-46bd-8a9c-e70b6330622c") 190 refID2 := strfmt.UUID("92d5920c-1c20-49da-9cdc-b765813e175b") 191 192 var repo *DB 193 var schemaGetter *fakeSchemaGetter 194 195 t.Run("init repo", func(t *testing.T) { 196 schemaGetter = &fakeSchemaGetter{ 197 shardState: singleShardState(), 198 schema: schema.Schema{ 199 Objects: &models.Schema{}, 200 }, 201 } 202 var err error 203 repo, err = New(logrus.New(), Config{ 204 MemtablesFlushDirtyAfter: 60, 205 RootPath: dirName, 206 QueryMaximumResults: 10000, 207 MaxImportGoroutinesFactor: 1, 208 }, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil) 209 require.Nil(t, err) 210 repo.SetSchemaGetter(schemaGetter) 211 require.Nil(t, repo.WaitForStartup(testCtx())) 212 }) 213 214 defer repo.Shutdown(testCtx()) 215 216 t.Run("add classes", func(t *testing.T) { 217 class := &models.Class{ 218 Class: "TestClass", 219 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 220 InvertedIndexConfig: &models.InvertedIndexConfig{ 221 IndexNullState: true, 222 IndexTimestamps: true, 223 IndexPropertyLength: true, 224 }, 225 Properties: []*models.Property{ 226 { 227 Name: "name", 228 DataType: schema.DataTypeText.PropString(), 229 Tokenization: models.PropertyTokenizationField, 230 }, 231 }, 232 } 233 234 refClass := &models.Class{ 235 Class: "RefClass", 236 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 237 InvertedIndexConfig: &models.InvertedIndexConfig{ 238 IndexTimestamps: true, 239 IndexPropertyLength: true, 240 }, 241 Properties: []*models.Property{ 242 { 243 Name: "name", 244 DataType: schema.DataTypeText.PropString(), 245 Tokenization: models.PropertyTokenizationField, 246 }, 247 { 248 Name: "toTest", 249 DataType: []string{"TestClass"}, 250 }, 251 }, 252 } 253 254 migrator := NewMigrator(repo, repo.logger) 255 err := migrator.AddClass(context.Background(), class, schemaGetter.shardState) 256 require.Nil(t, err) 257 err = migrator.AddClass(context.Background(), refClass, schemaGetter.shardState) 258 require.Nil(t, err) 259 schemaGetter.schema.Objects.Classes = append(schemaGetter.schema.Objects.Classes, class, refClass) 260 }) 261 262 t.Run("insert test objects", func(t *testing.T) { 263 vec := []float32{1, 2, 3} 264 for _, obj := range []*models.Object{ 265 { 266 ID: testID1, 267 Class: "TestClass", 268 Properties: map[string]interface{}{ 269 "name": "object1", 270 }, 271 }, 272 { 273 ID: testID2, 274 Class: "TestClass", 275 Properties: map[string]interface{}{ 276 "name": nil, 277 }, 278 }, 279 { 280 ID: refID1, 281 Class: "RefClass", 282 Properties: map[string]interface{}{ 283 "name": "ref1", 284 "toTest": models.MultipleRef{ 285 &models.SingleRef{ 286 Beacon: strfmt.URI(fmt.Sprintf("weaviate://localhost/TestClass/%s", testID1)), 287 }, 288 }, 289 }, 290 }, 291 { 292 ID: refID2, 293 Class: "RefClass", 294 Properties: map[string]interface{}{ 295 "name": "ref2", 296 "toTest": models.MultipleRef{ 297 &models.SingleRef{ 298 Beacon: strfmt.URI(fmt.Sprintf("weaviate://localhost/TestClass/%s", testID2)), 299 }, 300 }, 301 }, 302 }, 303 } { 304 err := repo.PutObject(context.Background(), obj, vec, nil, nil) 305 require.Nil(t, err) 306 } 307 }) 308 309 t.Run("check buckets exist", func(t *testing.T) { 310 index := repo.indices["testclass"] 311 n := 0 312 index.ForEachShard(func(_ string, shard ShardLike) error { 313 bucketNull := shard.Store().Bucket(helpers.BucketFromPropNameNullLSM("name")) 314 require.NotNil(t, bucketNull) 315 n++ 316 return nil 317 }) 318 require.Equal(t, 1, n) 319 }) 320 321 type testCase struct { 322 name string 323 filter *filters.LocalFilter 324 expectedIds []strfmt.UUID 325 } 326 327 t.Run("get object with null filters", func(t *testing.T) { 328 testCases := []testCase{ 329 { 330 name: "is null", 331 filter: &filters.LocalFilter{ 332 Root: &filters.Clause{ 333 Operator: filters.OperatorIsNull, 334 On: &filters.Path{ 335 Class: "TestClass", 336 Property: "name", 337 }, 338 Value: &filters.Value{ 339 Value: false, 340 Type: schema.DataTypeBoolean, 341 }, 342 }, 343 }, 344 expectedIds: []strfmt.UUID{testID1}, 345 }, 346 { 347 name: "is not null", 348 filter: &filters.LocalFilter{ 349 Root: &filters.Clause{ 350 Operator: filters.OperatorIsNull, 351 On: &filters.Path{ 352 Class: "TestClass", 353 Property: "name", 354 }, 355 Value: &filters.Value{ 356 Value: true, 357 Type: schema.DataTypeBoolean, 358 }, 359 }, 360 }, 361 expectedIds: []strfmt.UUID{testID2}, 362 }, 363 } 364 365 for _, tc := range testCases { 366 t.Run(tc.name, func(t *testing.T) { 367 res, err := repo.Search(context.Background(), dto.GetParams{ 368 ClassName: "TestClass", 369 Pagination: &filters.Pagination{Limit: 10}, 370 Filters: tc.filter, 371 }) 372 require.Nil(t, err) 373 require.Len(t, res, len(tc.expectedIds)) 374 375 ids := make([]strfmt.UUID, len(res)) 376 for i := range res { 377 ids[i] = res[i].ID 378 } 379 assert.ElementsMatch(t, ids, tc.expectedIds) 380 }) 381 } 382 }) 383 384 t.Run("get referencing object with null filters", func(t *testing.T) { 385 testCases := []testCase{ 386 { 387 name: "is null", 388 filter: &filters.LocalFilter{ 389 Root: &filters.Clause{ 390 Operator: filters.OperatorIsNull, 391 On: &filters.Path{ 392 Class: "RefClass", 393 Property: "toTest", 394 Child: &filters.Path{ 395 Class: "TestClass", 396 Property: "name", 397 }, 398 }, 399 Value: &filters.Value{ 400 Value: false, 401 Type: schema.DataTypeBoolean, 402 }, 403 }, 404 }, 405 expectedIds: []strfmt.UUID{refID1}, 406 }, 407 { 408 name: "is not null", 409 filter: &filters.LocalFilter{ 410 Root: &filters.Clause{ 411 Operator: filters.OperatorIsNull, 412 On: &filters.Path{ 413 Class: "RefClass", 414 Property: "toTest", 415 Child: &filters.Path{ 416 Class: "TestClass", 417 Property: "name", 418 }, 419 }, 420 Value: &filters.Value{ 421 Value: true, 422 Type: schema.DataTypeBoolean, 423 }, 424 }, 425 }, 426 expectedIds: []strfmt.UUID{refID2}, 427 }, 428 } 429 430 for _, tc := range testCases { 431 t.Run(tc.name, func(t *testing.T) { 432 res, err := repo.Search(context.Background(), dto.GetParams{ 433 ClassName: "RefClass", 434 Pagination: &filters.Pagination{Limit: 10}, 435 Filters: tc.filter, 436 }) 437 require.Nil(t, err) 438 require.Len(t, res, len(tc.expectedIds)) 439 440 ids := make([]strfmt.UUID, len(res)) 441 for i := range res { 442 ids[i] = res[i].ID 443 } 444 assert.ElementsMatch(t, ids, tc.expectedIds) 445 }) 446 } 447 }) 448 } 449 450 func TestIndexPropLength_GetClass(t *testing.T) { 451 dirName := t.TempDir() 452 453 testID1 := strfmt.UUID("a0b55b05-bc5b-4cc9-b646-1452d1390a62") 454 testID2 := strfmt.UUID("65be32cc-bb74-49c7-833e-afb14f957eae") 455 refID1 := strfmt.UUID("f2e42a9f-e0b5-46bd-8a9c-e70b6330622c") 456 refID2 := strfmt.UUID("92d5920c-1c20-49da-9cdc-b765813e175b") 457 458 var repo *DB 459 var schemaGetter *fakeSchemaGetter 460 461 t.Run("init repo", func(t *testing.T) { 462 schemaGetter = &fakeSchemaGetter{ 463 shardState: singleShardState(), 464 schema: schema.Schema{ 465 Objects: &models.Schema{}, 466 }, 467 } 468 var err error 469 repo, err = New(logrus.New(), Config{ 470 MemtablesFlushDirtyAfter: 60, 471 RootPath: dirName, 472 QueryMaximumResults: 10000, 473 MaxImportGoroutinesFactor: 1, 474 }, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil) 475 require.Nil(t, err) 476 repo.SetSchemaGetter(schemaGetter) 477 require.Nil(t, repo.WaitForStartup(testCtx())) 478 }) 479 480 defer repo.Shutdown(testCtx()) 481 482 t.Run("add classes", func(t *testing.T) { 483 class := &models.Class{ 484 Class: "TestClass", 485 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 486 InvertedIndexConfig: &models.InvertedIndexConfig{ 487 IndexPropertyLength: true, 488 IndexTimestamps: true, 489 }, 490 Properties: []*models.Property{ 491 { 492 Name: "name", 493 DataType: schema.DataTypeText.PropString(), 494 Tokenization: models.PropertyTokenizationField, 495 }, 496 { 497 Name: "int_array", 498 DataType: schema.DataTypeIntArray.PropString(), 499 }, 500 }, 501 } 502 503 refClass := &models.Class{ 504 Class: "RefClass", 505 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 506 InvertedIndexConfig: &models.InvertedIndexConfig{ 507 IndexTimestamps: true, 508 }, 509 Properties: []*models.Property{ 510 { 511 Name: "name", 512 DataType: schema.DataTypeText.PropString(), 513 Tokenization: models.PropertyTokenizationField, 514 }, 515 { 516 Name: "toTest", 517 DataType: []string{"TestClass"}, 518 }, 519 }, 520 } 521 522 migrator := NewMigrator(repo, repo.logger) 523 err := migrator.AddClass(context.Background(), class, schemaGetter.shardState) 524 require.Nil(t, err) 525 err = migrator.AddClass(context.Background(), refClass, schemaGetter.shardState) 526 require.Nil(t, err) 527 schemaGetter.schema.Objects.Classes = append(schemaGetter.schema.Objects.Classes, class, refClass) 528 }) 529 530 t.Run("insert test objects", func(t *testing.T) { 531 vec := []float32{1, 2, 3} 532 for _, obj := range []*models.Object{ 533 { 534 ID: testID1, 535 Class: "TestClass", 536 Properties: map[string]interface{}{ 537 "name": "short", 538 "int_array": []float64{}, 539 }, 540 }, 541 { 542 ID: testID2, 543 Class: "TestClass", 544 Properties: map[string]interface{}{ 545 "name": "muchLongerName", 546 "int_array": []float64{1, 2, 3}, 547 }, 548 }, 549 { 550 ID: refID1, 551 Class: "RefClass", 552 Properties: map[string]interface{}{ 553 "name": "ref1", 554 "toTest": models.MultipleRef{ 555 &models.SingleRef{ 556 Beacon: strfmt.URI(fmt.Sprintf("weaviate://localhost/TestClass/%s", testID1)), 557 }, 558 }, 559 }, 560 }, 561 { 562 ID: refID2, 563 Class: "RefClass", 564 Properties: map[string]interface{}{ 565 "name": "ref2", 566 "toTest": models.MultipleRef{ 567 &models.SingleRef{ 568 Beacon: strfmt.URI(fmt.Sprintf("weaviate://localhost/TestClass/%s", testID2)), 569 }, 570 }, 571 }, 572 }, 573 } { 574 err := repo.PutObject(context.Background(), obj, vec, nil, nil) 575 require.Nil(t, err) 576 } 577 }) 578 579 t.Run("check buckets exist", func(t *testing.T) { 580 index := repo.indices["testclass"] 581 n := 0 582 index.ForEachShard(func(_ string, shard ShardLike) error { 583 bucketPropLengthName := shard.Store().Bucket(helpers.BucketFromPropNameLengthLSM("name")) 584 require.NotNil(t, bucketPropLengthName) 585 bucketPropLengthIntArray := shard.Store().Bucket(helpers.BucketFromPropNameLengthLSM("int_array")) 586 require.NotNil(t, bucketPropLengthIntArray) 587 n++ 588 return nil 589 }) 590 require.Equal(t, 1, n) 591 }) 592 593 type testCase struct { 594 name string 595 filter *filters.LocalFilter 596 expectedIds []strfmt.UUID 597 } 598 599 t.Run("get object with prop length filters", func(t *testing.T) { 600 testCases := []testCase{ 601 { 602 name: "name length = 5", 603 filter: &filters.LocalFilter{ 604 Root: &filters.Clause{ 605 Operator: filters.OperatorEqual, 606 On: &filters.Path{ 607 Class: "TestClass", 608 Property: "len(name)", 609 }, 610 Value: &filters.Value{ 611 Value: 5, 612 Type: schema.DataTypeInt, 613 }, 614 }, 615 }, 616 expectedIds: []strfmt.UUID{testID1}, 617 }, 618 { 619 name: "name length >= 6", 620 filter: &filters.LocalFilter{ 621 Root: &filters.Clause{ 622 Operator: filters.OperatorGreaterThanEqual, 623 On: &filters.Path{ 624 Class: "TestClass", 625 Property: "len(name)", 626 }, 627 Value: &filters.Value{ 628 Value: 6, 629 Type: schema.DataTypeInt, 630 }, 631 }, 632 }, 633 expectedIds: []strfmt.UUID{testID2}, 634 }, 635 { 636 name: "array length = 0", 637 filter: &filters.LocalFilter{ 638 Root: &filters.Clause{ 639 Operator: filters.OperatorEqual, 640 On: &filters.Path{ 641 Class: "TestClass", 642 Property: "len(int_array)", 643 }, 644 Value: &filters.Value{ 645 Value: 0, 646 Type: schema.DataTypeInt, 647 }, 648 }, 649 }, 650 expectedIds: []strfmt.UUID{testID1}, 651 }, 652 { 653 name: "array length < 4", 654 filter: &filters.LocalFilter{ 655 Root: &filters.Clause{ 656 Operator: filters.OperatorLessThan, 657 On: &filters.Path{ 658 Class: "TestClass", 659 Property: "len(int_array)", 660 }, 661 Value: &filters.Value{ 662 Value: 4, 663 Type: schema.DataTypeInt, 664 }, 665 }, 666 }, 667 expectedIds: []strfmt.UUID{testID1, testID2}, 668 }, 669 } 670 671 for _, tc := range testCases { 672 t.Run(tc.name, func(t *testing.T) { 673 res, err := repo.Search(context.Background(), dto.GetParams{ 674 ClassName: "TestClass", 675 Pagination: &filters.Pagination{Limit: 10}, 676 Filters: tc.filter, 677 }) 678 require.Nil(t, err) 679 require.Len(t, res, len(tc.expectedIds)) 680 681 ids := make([]strfmt.UUID, len(res)) 682 for i := range res { 683 ids[i] = res[i].ID 684 } 685 assert.ElementsMatch(t, ids, tc.expectedIds) 686 }) 687 } 688 }) 689 690 t.Run("get referencing object with prop length filters", func(t *testing.T) { 691 testCases := []testCase{ 692 { 693 name: "name length = 5", 694 filter: &filters.LocalFilter{ 695 Root: &filters.Clause{ 696 Operator: filters.OperatorEqual, 697 On: &filters.Path{ 698 Class: "RefClass", 699 Property: "toTest", 700 Child: &filters.Path{ 701 Class: "TestClass", 702 Property: "len(name)", 703 }, 704 }, 705 Value: &filters.Value{ 706 Value: 5, 707 Type: schema.DataTypeInt, 708 }, 709 }, 710 }, 711 expectedIds: []strfmt.UUID{refID1}, 712 }, 713 { 714 name: "name length >= 6", 715 filter: &filters.LocalFilter{ 716 Root: &filters.Clause{ 717 Operator: filters.OperatorGreaterThanEqual, 718 On: &filters.Path{ 719 Class: "RefClass", 720 Property: "toTest", 721 Child: &filters.Path{ 722 Class: "TestClass", 723 Property: "len(name)", 724 }, 725 }, 726 Value: &filters.Value{ 727 Value: 6, 728 Type: schema.DataTypeInt, 729 }, 730 }, 731 }, 732 expectedIds: []strfmt.UUID{refID2}, 733 }, 734 { 735 name: "array length = 0", 736 filter: &filters.LocalFilter{ 737 Root: &filters.Clause{ 738 Operator: filters.OperatorEqual, 739 On: &filters.Path{ 740 Class: "RefClass", 741 Property: "toTest", 742 Child: &filters.Path{ 743 Class: "TestClass", 744 Property: "len(int_array)", 745 }, 746 }, 747 Value: &filters.Value{ 748 Value: 0, 749 Type: schema.DataTypeInt, 750 }, 751 }, 752 }, 753 expectedIds: []strfmt.UUID{refID1}, 754 }, 755 { 756 name: "array length < 4", 757 filter: &filters.LocalFilter{ 758 Root: &filters.Clause{ 759 Operator: filters.OperatorLessThan, 760 On: &filters.Path{ 761 Class: "RefClass", 762 Property: "toTest", 763 Child: &filters.Path{ 764 Class: "TestClass", 765 Property: "len(int_array)", 766 }, 767 }, 768 Value: &filters.Value{ 769 Value: 4, 770 Type: schema.DataTypeInt, 771 }, 772 }, 773 }, 774 expectedIds: []strfmt.UUID{refID1, refID2}, 775 }, 776 } 777 778 for _, tc := range testCases { 779 t.Run(tc.name, func(t *testing.T) { 780 res, err := repo.Search(context.Background(), dto.GetParams{ 781 ClassName: "RefClass", 782 Pagination: &filters.Pagination{Limit: 10}, 783 Filters: tc.filter, 784 }) 785 require.Nil(t, err) 786 require.Len(t, res, len(tc.expectedIds)) 787 788 ids := make([]strfmt.UUID, len(res)) 789 for i := range res { 790 ids[i] = res[i].ID 791 } 792 assert.ElementsMatch(t, ids, tc.expectedIds) 793 }) 794 } 795 }) 796 } 797 798 func TestIndexByTimestamps_GetClass(t *testing.T) { 799 dirName := t.TempDir() 800 801 time1 := time.Now() 802 time2 := time1.Add(-time.Hour) 803 timestamp1 := time1.UnixMilli() 804 timestamp2 := time2.UnixMilli() 805 806 testID1 := strfmt.UUID("a0b55b05-bc5b-4cc9-b646-1452d1390a62") 807 testID2 := strfmt.UUID("65be32cc-bb74-49c7-833e-afb14f957eae") 808 refID1 := strfmt.UUID("f2e42a9f-e0b5-46bd-8a9c-e70b6330622c") 809 refID2 := strfmt.UUID("92d5920c-1c20-49da-9cdc-b765813e175b") 810 811 var repo *DB 812 var schemaGetter *fakeSchemaGetter 813 814 t.Run("init repo", func(t *testing.T) { 815 schemaGetter = &fakeSchemaGetter{ 816 shardState: singleShardState(), 817 schema: schema.Schema{ 818 Objects: &models.Schema{}, 819 }, 820 } 821 var err error 822 repo, err = New(logrus.New(), Config{ 823 MemtablesFlushDirtyAfter: 60, 824 RootPath: dirName, 825 QueryMaximumResults: 10000, 826 MaxImportGoroutinesFactor: 1, 827 }, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil) 828 require.Nil(t, err) 829 repo.SetSchemaGetter(schemaGetter) 830 require.Nil(t, repo.WaitForStartup(testCtx())) 831 }) 832 833 defer repo.Shutdown(testCtx()) 834 835 t.Run("add classes", func(t *testing.T) { 836 class := &models.Class{ 837 Class: "TestClass", 838 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 839 InvertedIndexConfig: &models.InvertedIndexConfig{ 840 IndexTimestamps: true, 841 IndexPropertyLength: true, 842 }, 843 Properties: []*models.Property{ 844 { 845 Name: "name", 846 DataType: schema.DataTypeText.PropString(), 847 Tokenization: models.PropertyTokenizationField, 848 }, 849 }, 850 } 851 852 refClass := &models.Class{ 853 Class: "RefClass", 854 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 855 InvertedIndexConfig: &models.InvertedIndexConfig{ 856 IndexTimestamps: true, 857 IndexPropertyLength: true, 858 }, 859 Properties: []*models.Property{ 860 { 861 Name: "name", 862 DataType: schema.DataTypeText.PropString(), 863 Tokenization: models.PropertyTokenizationField, 864 }, 865 { 866 Name: "toTest", 867 DataType: []string{"TestClass"}, 868 }, 869 }, 870 } 871 872 migrator := NewMigrator(repo, repo.logger) 873 err := migrator.AddClass(context.Background(), class, schemaGetter.shardState) 874 require.Nil(t, err) 875 err = migrator.AddClass(context.Background(), refClass, schemaGetter.shardState) 876 require.Nil(t, err) 877 schemaGetter.schema.Objects.Classes = append(schemaGetter.schema.Objects.Classes, class, refClass) 878 }) 879 880 t.Run("insert test objects", func(t *testing.T) { 881 vec := []float32{1, 2, 3} 882 for _, obj := range []*models.Object{ 883 { 884 ID: testID1, 885 Class: "TestClass", 886 CreationTimeUnix: timestamp1, 887 LastUpdateTimeUnix: timestamp1, 888 Properties: map[string]interface{}{ 889 "name": "object1", 890 }, 891 }, 892 { 893 ID: testID2, 894 Class: "TestClass", 895 CreationTimeUnix: timestamp2, 896 LastUpdateTimeUnix: timestamp2, 897 Properties: map[string]interface{}{ 898 "name": "object2", 899 }, 900 }, 901 { 902 ID: refID1, 903 Class: "RefClass", 904 Properties: map[string]interface{}{ 905 "name": "ref1", 906 "toTest": models.MultipleRef{ 907 &models.SingleRef{ 908 Beacon: strfmt.URI(fmt.Sprintf("weaviate://localhost/TestClass/%s", testID1)), 909 }, 910 }, 911 }, 912 }, 913 { 914 ID: refID2, 915 Class: "RefClass", 916 Properties: map[string]interface{}{ 917 "name": "ref2", 918 "toTest": models.MultipleRef{ 919 &models.SingleRef{ 920 Beacon: strfmt.URI(fmt.Sprintf("weaviate://localhost/TestClass/%s", testID2)), 921 }, 922 }, 923 }, 924 }, 925 } { 926 err := repo.PutObject(context.Background(), obj, vec, nil, nil) 927 require.Nil(t, err) 928 } 929 }) 930 931 t.Run("check buckets exist", func(t *testing.T) { 932 index := repo.indices["testclass"] 933 n := 0 934 index.ForEachShard(func(_ string, shard ShardLike) error { 935 bucketCreated := shard.Store().Bucket("property_" + filters.InternalPropCreationTimeUnix) 936 require.NotNil(t, bucketCreated) 937 bucketUpdated := shard.Store().Bucket("property_" + filters.InternalPropLastUpdateTimeUnix) 938 require.NotNil(t, bucketUpdated) 939 n++ 940 return nil 941 }) 942 require.Equal(t, 1, n) 943 }) 944 945 type testCase struct { 946 name string 947 filter *filters.LocalFilter 948 expectedIds []strfmt.UUID 949 } 950 951 t.Run("get object with timestamp filters", func(t *testing.T) { 952 testCases := []testCase{ 953 { 954 name: "by creation timestamp 1", 955 filter: &filters.LocalFilter{ 956 Root: &filters.Clause{ 957 Operator: filters.OperatorEqual, 958 On: &filters.Path{ 959 Class: "TestClass", 960 Property: "_creationTimeUnix", 961 }, 962 Value: &filters.Value{ 963 Value: fmt.Sprint(timestamp1), 964 Type: schema.DataTypeText, 965 }, 966 }, 967 }, 968 expectedIds: []strfmt.UUID{testID1}, 969 }, 970 { 971 name: "by creation timestamp 2", 972 filter: &filters.LocalFilter{ 973 Root: &filters.Clause{ 974 Operator: filters.OperatorEqual, 975 On: &filters.Path{ 976 Class: "TestClass", 977 Property: "_creationTimeUnix", 978 }, 979 Value: &filters.Value{ 980 Value: fmt.Sprint(timestamp2), 981 Type: schema.DataTypeText, 982 }, 983 }, 984 }, 985 expectedIds: []strfmt.UUID{testID2}, 986 }, 987 { 988 name: "by creation date 1", 989 filter: &filters.LocalFilter{ 990 Root: &filters.Clause{ 991 // since RFC3339 is limited to seconds, 992 // >= operator is used to match object with timestamp containing milliseconds 993 Operator: filters.OperatorGreaterThanEqual, 994 On: &filters.Path{ 995 Class: "TestClass", 996 Property: "_creationTimeUnix", 997 }, 998 Value: &filters.Value{ 999 Value: time1.Format(time.RFC3339), 1000 Type: schema.DataTypeDate, 1001 }, 1002 }, 1003 }, 1004 expectedIds: []strfmt.UUID{testID1}, 1005 }, 1006 { 1007 name: "by creation date 2", 1008 filter: &filters.LocalFilter{ 1009 Root: &filters.Clause{ 1010 // since RFC3339 is limited to seconds, 1011 // >= operator is used to match object with timestamp containing milliseconds 1012 Operator: filters.OperatorGreaterThanEqual, 1013 On: &filters.Path{ 1014 Class: "TestClass", 1015 Property: "_creationTimeUnix", 1016 }, 1017 Value: &filters.Value{ 1018 Value: time2.Format(time.RFC3339), 1019 Type: schema.DataTypeDate, 1020 }, 1021 }, 1022 }, 1023 expectedIds: []strfmt.UUID{testID1, testID2}, 1024 }, 1025 1026 { 1027 name: "by updated timestamp 1", 1028 filter: &filters.LocalFilter{ 1029 Root: &filters.Clause{ 1030 Operator: filters.OperatorEqual, 1031 On: &filters.Path{ 1032 Class: "TestClass", 1033 Property: "_lastUpdateTimeUnix", 1034 }, 1035 Value: &filters.Value{ 1036 Value: fmt.Sprint(timestamp1), 1037 Type: schema.DataTypeText, 1038 }, 1039 }, 1040 }, 1041 expectedIds: []strfmt.UUID{testID1}, 1042 }, 1043 { 1044 name: "by updated timestamp 2", 1045 filter: &filters.LocalFilter{ 1046 Root: &filters.Clause{ 1047 Operator: filters.OperatorEqual, 1048 On: &filters.Path{ 1049 Class: "TestClass", 1050 Property: "_lastUpdateTimeUnix", 1051 }, 1052 Value: &filters.Value{ 1053 Value: fmt.Sprint(timestamp2), 1054 Type: schema.DataTypeText, 1055 }, 1056 }, 1057 }, 1058 expectedIds: []strfmt.UUID{testID2}, 1059 }, 1060 { 1061 name: "by updated date 1", 1062 filter: &filters.LocalFilter{ 1063 Root: &filters.Clause{ 1064 // since RFC3339 is limited to seconds, 1065 // >= operator is used to match object with timestamp containing milliseconds 1066 Operator: filters.OperatorGreaterThanEqual, 1067 On: &filters.Path{ 1068 Class: "TestClass", 1069 Property: "_lastUpdateTimeUnix", 1070 }, 1071 Value: &filters.Value{ 1072 Value: time1.Format(time.RFC3339), 1073 Type: schema.DataTypeDate, 1074 }, 1075 }, 1076 }, 1077 expectedIds: []strfmt.UUID{testID1}, 1078 }, 1079 { 1080 name: "by updated date 2", 1081 filter: &filters.LocalFilter{ 1082 Root: &filters.Clause{ 1083 // since RFC3339 is limited to seconds, 1084 // >= operator is used to match object with timestamp containing milliseconds 1085 Operator: filters.OperatorGreaterThanEqual, 1086 On: &filters.Path{ 1087 Class: "TestClass", 1088 Property: "_lastUpdateTimeUnix", 1089 }, 1090 Value: &filters.Value{ 1091 Value: time2.Format(time.RFC3339), 1092 Type: schema.DataTypeDate, 1093 }, 1094 }, 1095 }, 1096 expectedIds: []strfmt.UUID{testID1, testID2}, 1097 }, 1098 } 1099 1100 for _, tc := range testCases { 1101 t.Run(tc.name, func(t *testing.T) { 1102 res, err := repo.Search(context.Background(), dto.GetParams{ 1103 ClassName: "TestClass", 1104 Pagination: &filters.Pagination{Limit: 10}, 1105 Filters: tc.filter, 1106 }) 1107 require.Nil(t, err) 1108 require.Len(t, res, len(tc.expectedIds)) 1109 1110 ids := make([]strfmt.UUID, len(res)) 1111 for i := range res { 1112 ids[i] = res[i].ID 1113 } 1114 assert.ElementsMatch(t, ids, tc.expectedIds) 1115 }) 1116 } 1117 }) 1118 1119 t.Run("get referencing object with timestamp filters", func(t *testing.T) { 1120 testCases := []testCase{ 1121 { 1122 name: "by creation timestamp 1", 1123 filter: &filters.LocalFilter{ 1124 Root: &filters.Clause{ 1125 Operator: filters.OperatorEqual, 1126 On: &filters.Path{ 1127 Class: "RefClass", 1128 Property: "toTest", 1129 Child: &filters.Path{ 1130 Class: "TestClass", 1131 Property: "_creationTimeUnix", 1132 }, 1133 }, 1134 Value: &filters.Value{ 1135 Value: fmt.Sprint(timestamp1), 1136 Type: schema.DataTypeText, 1137 }, 1138 }, 1139 }, 1140 expectedIds: []strfmt.UUID{refID1}, 1141 }, 1142 { 1143 name: "by creation timestamp 2", 1144 filter: &filters.LocalFilter{ 1145 Root: &filters.Clause{ 1146 Operator: filters.OperatorEqual, 1147 On: &filters.Path{ 1148 Class: "RefClass", 1149 Property: "toTest", 1150 Child: &filters.Path{ 1151 Class: "TestClass", 1152 Property: "_creationTimeUnix", 1153 }, 1154 }, 1155 Value: &filters.Value{ 1156 Value: fmt.Sprint(timestamp2), 1157 Type: schema.DataTypeText, 1158 }, 1159 }, 1160 }, 1161 expectedIds: []strfmt.UUID{refID2}, 1162 }, 1163 { 1164 name: "by creation date 1", 1165 filter: &filters.LocalFilter{ 1166 Root: &filters.Clause{ 1167 // since RFC3339 is limited to seconds, 1168 // >= operator is used to match object with timestamp containing milliseconds 1169 Operator: filters.OperatorGreaterThanEqual, 1170 On: &filters.Path{ 1171 Class: "RefClass", 1172 Property: "toTest", 1173 Child: &filters.Path{ 1174 Class: "TestClass", 1175 Property: "_creationTimeUnix", 1176 }, 1177 }, 1178 Value: &filters.Value{ 1179 Value: time1.Format(time.RFC3339), 1180 Type: schema.DataTypeDate, 1181 }, 1182 }, 1183 }, 1184 expectedIds: []strfmt.UUID{refID1}, 1185 }, 1186 { 1187 name: "by creation date 2", 1188 filter: &filters.LocalFilter{ 1189 Root: &filters.Clause{ 1190 // since RFC3339 is limited to seconds, 1191 // >= operator is used to match object with timestamp containing milliseconds 1192 Operator: filters.OperatorGreaterThanEqual, 1193 On: &filters.Path{ 1194 Class: "RefClass", 1195 Property: "toTest", 1196 Child: &filters.Path{ 1197 Class: "TestClass", 1198 Property: "_creationTimeUnix", 1199 }, 1200 }, 1201 Value: &filters.Value{ 1202 Value: time2.Format(time.RFC3339), 1203 Type: schema.DataTypeDate, 1204 }, 1205 }, 1206 }, 1207 expectedIds: []strfmt.UUID{refID1, refID2}, 1208 }, 1209 1210 { 1211 name: "by updated timestamp 1", 1212 filter: &filters.LocalFilter{ 1213 Root: &filters.Clause{ 1214 Operator: filters.OperatorEqual, 1215 On: &filters.Path{ 1216 Class: "RefClass", 1217 Property: "toTest", 1218 Child: &filters.Path{ 1219 Class: "TestClass", 1220 Property: "_lastUpdateTimeUnix", 1221 }, 1222 }, 1223 Value: &filters.Value{ 1224 Value: fmt.Sprint(timestamp1), 1225 Type: schema.DataTypeText, 1226 }, 1227 }, 1228 }, 1229 expectedIds: []strfmt.UUID{refID1}, 1230 }, 1231 { 1232 name: "by updated timestamp 2", 1233 filter: &filters.LocalFilter{ 1234 Root: &filters.Clause{ 1235 Operator: filters.OperatorEqual, 1236 On: &filters.Path{ 1237 Class: "RefClass", 1238 Property: "toTest", 1239 Child: &filters.Path{ 1240 Class: "TestClass", 1241 Property: "_lastUpdateTimeUnix", 1242 }, 1243 }, 1244 Value: &filters.Value{ 1245 Value: fmt.Sprint(timestamp2), 1246 Type: schema.DataTypeText, 1247 }, 1248 }, 1249 }, 1250 expectedIds: []strfmt.UUID{refID2}, 1251 }, 1252 { 1253 name: "by updated date 1", 1254 filter: &filters.LocalFilter{ 1255 Root: &filters.Clause{ 1256 // since RFC3339 is limited to seconds, 1257 // >= operator is used to match object with timestamp containing milliseconds 1258 Operator: filters.OperatorGreaterThanEqual, 1259 On: &filters.Path{ 1260 Class: "RefClass", 1261 Property: "toTest", 1262 Child: &filters.Path{ 1263 Class: "TestClass", 1264 Property: "_lastUpdateTimeUnix", 1265 }, 1266 }, 1267 Value: &filters.Value{ 1268 Value: time1.Format(time.RFC3339), 1269 Type: schema.DataTypeDate, 1270 }, 1271 }, 1272 }, 1273 expectedIds: []strfmt.UUID{refID1}, 1274 }, 1275 { 1276 name: "by updated date 2", 1277 filter: &filters.LocalFilter{ 1278 Root: &filters.Clause{ 1279 // since RFC3339 is limited to seconds, 1280 // >= operator is used to match object with timestamp containing milliseconds 1281 Operator: filters.OperatorGreaterThanEqual, 1282 On: &filters.Path{ 1283 Class: "RefClass", 1284 Property: "toTest", 1285 Child: &filters.Path{ 1286 Class: "TestClass", 1287 Property: "_lastUpdateTimeUnix", 1288 }, 1289 }, 1290 Value: &filters.Value{ 1291 Value: time2.Format(time.RFC3339), 1292 Type: schema.DataTypeDate, 1293 }, 1294 }, 1295 }, 1296 expectedIds: []strfmt.UUID{refID1, refID2}, 1297 }, 1298 } 1299 1300 for _, tc := range testCases { 1301 t.Run(tc.name, func(t *testing.T) { 1302 res, err := repo.Search(context.Background(), dto.GetParams{ 1303 ClassName: "RefClass", 1304 Pagination: &filters.Pagination{Limit: 10}, 1305 Filters: tc.filter, 1306 }) 1307 require.Nil(t, err) 1308 require.Len(t, res, len(tc.expectedIds)) 1309 1310 ids := make([]strfmt.UUID, len(res)) 1311 for i := range res { 1312 ids[i] = res[i].ID 1313 } 1314 assert.ElementsMatch(t, ids, tc.expectedIds) 1315 }) 1316 } 1317 }) 1318 } 1319 1320 // Cannot filter for property length without enabling in the InvertedIndexConfig 1321 func TestFilterPropertyLengthError(t *testing.T) { 1322 class := createClassWithEverything(false, false) 1323 migrator, repo, schemaGetter := createRepo(t) 1324 defer repo.Shutdown(context.Background()) 1325 err := migrator.AddClass(context.Background(), class, schemaGetter.shardState) 1326 require.Nil(t, err) 1327 // update schema getter so it's in sync with class 1328 schemaGetter.schema = schema.Schema{ 1329 Objects: &models.Schema{ 1330 Classes: []*models.Class{class}, 1331 }, 1332 } 1333 1334 LengthFilter := &filters.LocalFilter{ 1335 Root: &filters.Clause{ 1336 Operator: filters.OperatorEqual, 1337 On: &filters.Path{ 1338 Class: schema.ClassName(carClass.Class), 1339 Property: "len(" + schema.PropertyName(class.Properties[0].Name) + ")", 1340 }, 1341 Value: &filters.Value{ 1342 Value: 1, 1343 Type: dtInt, 1344 }, 1345 }, 1346 } 1347 1348 params := dto.GetParams{ 1349 SearchVector: []float32{0.1, 0.1, 0.1, 1.1, 0.1}, 1350 ClassName: class.Class, 1351 Pagination: &filters.Pagination{Limit: 5}, 1352 Filters: LengthFilter, 1353 } 1354 _, err = repo.Search(context.Background(), params) 1355 require.NotNil(t, err) 1356 }