github.com/weaviate/weaviate@v1.24.6/usecases/objects/batch_add_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package objects 13 14 import ( 15 "context" 16 "fmt" 17 "testing" 18 19 "github.com/go-openapi/strfmt" 20 "github.com/sirupsen/logrus/hooks/test" 21 "github.com/stretchr/testify/assert" 22 "github.com/stretchr/testify/mock" 23 "github.com/stretchr/testify/require" 24 "github.com/weaviate/weaviate/entities/models" 25 "github.com/weaviate/weaviate/entities/schema" 26 "github.com/weaviate/weaviate/entities/vectorindex/hnsw" 27 "github.com/weaviate/weaviate/usecases/config" 28 ) 29 30 func Test_BatchManager_AddObjects_WithNoVectorizerModule(t *testing.T) { 31 var ( 32 vectorRepo *fakeVectorRepo 33 modulesProvider *fakeModulesProvider 34 manager *BatchManager 35 ) 36 37 schema := schema.Schema{ 38 Objects: &models.Schema{ 39 Classes: []*models.Class{ 40 { 41 Vectorizer: config.VectorizerModuleNone, 42 Class: "Foo", 43 VectorIndexConfig: hnsw.UserConfig{}, 44 }, 45 { 46 Vectorizer: config.VectorizerModuleNone, 47 Class: "FooSkipped", 48 VectorIndexConfig: hnsw.UserConfig{ 49 Skip: true, 50 }, 51 }, 52 }, 53 }, 54 } 55 56 resetAutoSchema := func(autoSchema bool) { 57 vectorRepo = &fakeVectorRepo{} 58 config := &config.WeaviateConfig{ 59 Config: config.Config{ 60 AutoSchema: config.AutoSchema{ 61 Enabled: autoSchema, 62 }, 63 TrackVectorDimensions: true, 64 }, 65 } 66 locks := &fakeLocks{} 67 schemaManager := &fakeSchemaManager{ 68 GetSchemaResponse: schema, 69 } 70 logger, _ := test.NewNullLogger() 71 authorizer := &fakeAuthorizer{} 72 modulesProvider = getFakeModulesProvider() 73 manager = NewBatchManager(vectorRepo, modulesProvider, locks, 74 schemaManager, config, logger, authorizer, nil) 75 } 76 77 reset := func() { 78 resetAutoSchema(false) 79 } 80 ctx := context.Background() 81 82 t.Run("without any objects", func(t *testing.T) { 83 reset() 84 expectedErr := NewErrInvalidUserInput("invalid param 'objects': cannot be empty, need at least" + 85 " one object for batching") 86 87 _, err := manager.AddObjects(ctx, nil, []*models.Object{}, []*string{}, nil) 88 89 assert.Equal(t, expectedErr, err) 90 }) 91 92 t.Run("with objects without IDs", func(t *testing.T) { 93 reset() 94 vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() 95 objects := []*models.Object{ 96 { 97 Class: "Foo", 98 Vector: []float32{0.1, 0.1, 0.1111}, 99 }, 100 { 101 Class: "Foo", 102 Vector: []float32{0.2, 0.2, 0.2222}, 103 }, 104 } 105 106 for range objects { 107 modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). 108 Return(nil, nil) 109 } 110 111 _, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) 112 repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects) 113 114 assert.Nil(t, err) 115 require.Len(t, repoCalledWithObjects, 2) 116 assert.Len(t, repoCalledWithObjects[0].UUID, 36, 117 "a uuid was set for the first object") 118 assert.Len(t, repoCalledWithObjects[1].UUID, 36, 119 "a uuid was set for the second object") 120 assert.Nil(t, repoCalledWithObjects[0].Err) 121 assert.Nil(t, repoCalledWithObjects[1].Err) 122 assert.Equal(t, models.C11yVector{0.1, 0.1, 0.1111}, repoCalledWithObjects[0].Object.Vector, 123 "the correct vector was used") 124 assert.Equal(t, models.C11yVector{0.2, 0.2, 0.2222}, repoCalledWithObjects[1].Object.Vector, 125 "the correct vector was used") 126 }) 127 128 t.Run("with objects without IDs and nonexistent class and auto schema enabled", func(t *testing.T) { 129 resetAutoSchema(true) 130 vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() 131 objects := []*models.Object{ 132 { 133 Class: "NonExistentFoo", 134 Vector: []float32{0.1, 0.1, 0.1111}, 135 }, 136 { 137 Class: "NonExistentFoo", 138 Vector: []float32{0.2, 0.2, 0.2222}, 139 }, 140 } 141 142 for range objects { 143 modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). 144 Return(nil, nil) 145 } 146 147 _, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) 148 repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects) 149 150 assert.Nil(t, err) 151 require.Len(t, repoCalledWithObjects, 2) 152 assert.Len(t, repoCalledWithObjects[0].UUID, 36, 153 "a uuid was set for the first object") 154 assert.Len(t, repoCalledWithObjects[1].UUID, 36, 155 "a uuid was set for the second object") 156 assert.Nil(t, repoCalledWithObjects[0].Err) 157 assert.Nil(t, repoCalledWithObjects[1].Err) 158 assert.Equal(t, models.C11yVector{0.1, 0.1, 0.1111}, repoCalledWithObjects[0].Object.Vector, 159 "the correct vector was used") 160 assert.Equal(t, models.C11yVector{0.2, 0.2, 0.2222}, repoCalledWithObjects[1].Object.Vector, 161 "the correct vector was used") 162 }) 163 164 t.Run("with user-specified IDs", func(t *testing.T) { 165 reset() 166 vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() 167 id1 := strfmt.UUID("2d3942c3-b412-4d80-9dfa-99a646629cd2") 168 id2 := strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6") 169 objects := []*models.Object{ 170 { 171 ID: id1, 172 Class: "Foo", 173 Vector: []float32{0.1, 0.1, 0.1111}, 174 }, 175 { 176 ID: id2, 177 Class: "Foo", 178 Vector: []float32{0.2, 0.2, 0.2222}, 179 }, 180 } 181 182 for range objects { 183 modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). 184 Return(nil, nil) 185 } 186 187 _, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) 188 repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects) 189 190 assert.Nil(t, err) 191 require.Len(t, repoCalledWithObjects, 2) 192 assert.Equal(t, id1, repoCalledWithObjects[0].UUID, "the user-specified uuid was used") 193 assert.Equal(t, id2, repoCalledWithObjects[1].UUID, "the user-specified uuid was used") 194 assert.Nil(t, repoCalledWithObjects[0].Err) 195 assert.Nil(t, repoCalledWithObjects[1].Err) 196 assert.Equal(t, models.C11yVector{0.1, 0.1, 0.1111}, repoCalledWithObjects[0].Object.Vector, 197 "the correct vector was used") 198 assert.Equal(t, models.C11yVector{0.2, 0.2, 0.2222}, repoCalledWithObjects[1].Object.Vector, 199 "the correct vector was used") 200 }) 201 202 t.Run("with an invalid user-specified IDs", func(t *testing.T) { 203 reset() 204 vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() 205 id1 := strfmt.UUID("invalid") 206 id2 := strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6") 207 objects := []*models.Object{ 208 { 209 ID: id1, 210 Class: "Foo", 211 Vector: []float32{0.1, 0.1, 0.1111}, 212 }, 213 { 214 ID: id2, 215 Class: "Foo", 216 Vector: []float32{0.2, 0.2, 0.2222}, 217 }, 218 } 219 220 for range objects { 221 modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). 222 Return(nil, nil) 223 } 224 225 _, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) 226 repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects) 227 228 assert.Nil(t, err) 229 require.Len(t, repoCalledWithObjects, 2) 230 assert.Equal(t, repoCalledWithObjects[0].Err.Error(), fmt.Sprintf("invalid UUID length: %d", len(id1))) 231 assert.Equal(t, id2, repoCalledWithObjects[1].UUID, "the user-specified uuid was used") 232 }) 233 234 t.Run("without any vectors", func(t *testing.T) { 235 // prior to v1.10 this was the desired behavior: 236 // note that this should fail on class Foo, but be accepted on class 237 // FooSkipped 238 // 239 // However, since v1.10, it is acceptable to exclude a vector, even if 240 // indexing is not skipped. In this case only the individual element is 241 // skipped. See https://github.com/weaviate/weaviate/issues/1800 242 reset() 243 vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() 244 objects := []*models.Object{ 245 { 246 Class: "Foo", 247 }, 248 { 249 Class: "FooSkipped", 250 }, 251 } 252 253 for range objects { 254 modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). 255 Return(nil, nil) 256 } 257 258 _, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) 259 repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects) 260 261 assert.Nil(t, err) 262 require.Len(t, repoCalledWithObjects, 2) 263 assert.Nil(t, repoCalledWithObjects[0].Err) 264 assert.Nil(t, repoCalledWithObjects[1].Err) 265 }) 266 } 267 268 func Test_BatchManager_AddObjects_WithExternalVectorizerModule(t *testing.T) { 269 var ( 270 vectorRepo *fakeVectorRepo 271 modulesProvider *fakeModulesProvider 272 manager *BatchManager 273 ) 274 275 schema := schema.Schema{ 276 Objects: &models.Schema{ 277 Classes: []*models.Class{ 278 { 279 Vectorizer: config.VectorizerModuleText2VecContextionary, 280 VectorIndexConfig: hnsw.UserConfig{}, 281 Class: "Foo", 282 }, 283 }, 284 }, 285 } 286 287 reset := func() { 288 vectorRepo = &fakeVectorRepo{} 289 config := &config.WeaviateConfig{} 290 locks := &fakeLocks{} 291 schemaManager := &fakeSchemaManager{ 292 GetSchemaResponse: schema, 293 } 294 logger, _ := test.NewNullLogger() 295 authorizer := &fakeAuthorizer{} 296 modulesProvider = getFakeModulesProvider() 297 manager = NewBatchManager(vectorRepo, modulesProvider, locks, 298 schemaManager, config, logger, authorizer, nil) 299 } 300 301 ctx := context.Background() 302 303 t.Run("without any objects", func(t *testing.T) { 304 reset() 305 expectedErr := NewErrInvalidUserInput("invalid param 'objects': cannot be empty, need at least" + 306 " one object for batching") 307 308 _, err := manager.AddObjects(ctx, nil, []*models.Object{}, []*string{}, nil) 309 310 assert.Equal(t, expectedErr, err) 311 }) 312 313 t.Run("with objects without IDs", func(t *testing.T) { 314 reset() 315 vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() 316 expectedVector := models.C11yVector{0, 1, 2} 317 objects := []*models.Object{ 318 { 319 Class: "Foo", 320 }, 321 { 322 Class: "Foo", 323 }, 324 } 325 326 for range objects { 327 modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). 328 Return(expectedVector, nil) 329 } 330 331 _, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) 332 repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects) 333 334 assert.Nil(t, err) 335 require.Len(t, repoCalledWithObjects, 2) 336 assert.Len(t, repoCalledWithObjects[0].UUID, 36, "a uuid was set for the first object") 337 assert.Len(t, repoCalledWithObjects[1].UUID, 36, "a uuid was set for the second object") 338 assert.Nil(t, repoCalledWithObjects[0].Err) 339 assert.Nil(t, repoCalledWithObjects[1].Err) 340 assert.Equal(t, expectedVector, repoCalledWithObjects[0].Object.Vector, 341 "the correct vector was used") 342 assert.Equal(t, expectedVector, repoCalledWithObjects[1].Object.Vector, 343 "the correct vector was used") 344 }) 345 346 t.Run("with user-specified IDs", func(t *testing.T) { 347 reset() 348 vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() 349 id1 := strfmt.UUID("2d3942c3-b412-4d80-9dfa-99a646629cd2") 350 id2 := strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6") 351 objects := []*models.Object{ 352 { 353 ID: id1, 354 Class: "Foo", 355 }, 356 { 357 ID: id2, 358 Class: "Foo", 359 }, 360 } 361 362 for range objects { 363 modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). 364 Return(nil, nil) 365 } 366 367 _, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) 368 repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects) 369 370 assert.Nil(t, err) 371 require.Len(t, repoCalledWithObjects, 2) 372 assert.Equal(t, id1, repoCalledWithObjects[0].UUID, "the user-specified uuid was used") 373 assert.Equal(t, id2, repoCalledWithObjects[1].UUID, "the user-specified uuid was used") 374 }) 375 376 t.Run("with an invalid user-specified IDs", func(t *testing.T) { 377 reset() 378 vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() 379 id1 := strfmt.UUID("invalid") 380 id2 := strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6") 381 objects := []*models.Object{ 382 { 383 ID: id1, 384 Class: "Foo", 385 }, 386 { 387 ID: id2, 388 Class: "Foo", 389 }, 390 } 391 392 for range objects { 393 modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). 394 Return(nil, nil) 395 } 396 397 _, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) 398 repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects) 399 400 assert.Nil(t, err) 401 require.Len(t, repoCalledWithObjects, 2) 402 assert.Equal(t, repoCalledWithObjects[0].Err.Error(), fmt.Sprintf("invalid UUID length: %d", len(id1))) 403 assert.Equal(t, id2, repoCalledWithObjects[1].UUID, "the user-specified uuid was used") 404 }) 405 } 406 407 func Test_BatchManager_AddObjectsEmptyProperties(t *testing.T) { 408 var ( 409 vectorRepo *fakeVectorRepo 410 modulesProvider *fakeModulesProvider 411 manager *BatchManager 412 ) 413 schema := schema.Schema{ 414 Objects: &models.Schema{ 415 Classes: []*models.Class{ 416 { 417 Class: "TestClass", 418 VectorIndexConfig: hnsw.UserConfig{}, 419 420 Properties: []*models.Property{ 421 { 422 Name: "strings", 423 DataType: schema.DataTypeTextArray.PropString(), 424 Tokenization: models.PropertyTokenizationWhitespace, 425 }, 426 }, 427 }, 428 }, 429 }, 430 } 431 reset := func() { 432 vectorRepo = &fakeVectorRepo{} 433 vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() 434 config := &config.WeaviateConfig{} 435 locks := &fakeLocks{} 436 schemaManager := &fakeSchemaManager{ 437 GetSchemaResponse: schema, 438 } 439 logger, _ := test.NewNullLogger() 440 authorizer := &fakeAuthorizer{} 441 modulesProvider = getFakeModulesProvider() 442 manager = NewBatchManager(vectorRepo, modulesProvider, locks, 443 schemaManager, config, logger, authorizer, nil) 444 } 445 reset() 446 objects := []*models.Object{ 447 { 448 ID: strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6"), 449 Class: "TestClass", 450 }, 451 { 452 ID: strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff3"), 453 Class: "TestClass", 454 Properties: map[string]interface{}{ 455 "name": "testName", 456 }, 457 }, 458 } 459 require.Nil(t, objects[0].Properties) 460 require.NotNil(t, objects[1].Properties) 461 462 ctx := context.Background() 463 for range objects { 464 modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). 465 Return(nil, nil) 466 } 467 addedObjects, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) 468 assert.Nil(t, err) 469 require.Len(t, addedObjects, 2) 470 require.NotNil(t, addedObjects[0].Object.Properties) 471 require.NotNil(t, addedObjects[1].Object.Properties) 472 }