github.com/weaviate/weaviate@v1.24.6/usecases/objects/batch_add_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package objects
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"testing"
    18  
    19  	"github.com/go-openapi/strfmt"
    20  	"github.com/sirupsen/logrus/hooks/test"
    21  	"github.com/stretchr/testify/assert"
    22  	"github.com/stretchr/testify/mock"
    23  	"github.com/stretchr/testify/require"
    24  	"github.com/weaviate/weaviate/entities/models"
    25  	"github.com/weaviate/weaviate/entities/schema"
    26  	"github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    27  	"github.com/weaviate/weaviate/usecases/config"
    28  )
    29  
    30  func Test_BatchManager_AddObjects_WithNoVectorizerModule(t *testing.T) {
    31  	var (
    32  		vectorRepo      *fakeVectorRepo
    33  		modulesProvider *fakeModulesProvider
    34  		manager         *BatchManager
    35  	)
    36  
    37  	schema := schema.Schema{
    38  		Objects: &models.Schema{
    39  			Classes: []*models.Class{
    40  				{
    41  					Vectorizer:        config.VectorizerModuleNone,
    42  					Class:             "Foo",
    43  					VectorIndexConfig: hnsw.UserConfig{},
    44  				},
    45  				{
    46  					Vectorizer: config.VectorizerModuleNone,
    47  					Class:      "FooSkipped",
    48  					VectorIndexConfig: hnsw.UserConfig{
    49  						Skip: true,
    50  					},
    51  				},
    52  			},
    53  		},
    54  	}
    55  
    56  	resetAutoSchema := func(autoSchema bool) {
    57  		vectorRepo = &fakeVectorRepo{}
    58  		config := &config.WeaviateConfig{
    59  			Config: config.Config{
    60  				AutoSchema: config.AutoSchema{
    61  					Enabled: autoSchema,
    62  				},
    63  				TrackVectorDimensions: true,
    64  			},
    65  		}
    66  		locks := &fakeLocks{}
    67  		schemaManager := &fakeSchemaManager{
    68  			GetSchemaResponse: schema,
    69  		}
    70  		logger, _ := test.NewNullLogger()
    71  		authorizer := &fakeAuthorizer{}
    72  		modulesProvider = getFakeModulesProvider()
    73  		manager = NewBatchManager(vectorRepo, modulesProvider, locks,
    74  			schemaManager, config, logger, authorizer, nil)
    75  	}
    76  
    77  	reset := func() {
    78  		resetAutoSchema(false)
    79  	}
    80  	ctx := context.Background()
    81  
    82  	t.Run("without any objects", func(t *testing.T) {
    83  		reset()
    84  		expectedErr := NewErrInvalidUserInput("invalid param 'objects': cannot be empty, need at least" +
    85  			" one object for batching")
    86  
    87  		_, err := manager.AddObjects(ctx, nil, []*models.Object{}, []*string{}, nil)
    88  
    89  		assert.Equal(t, expectedErr, err)
    90  	})
    91  
    92  	t.Run("with objects without IDs", func(t *testing.T) {
    93  		reset()
    94  		vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
    95  		objects := []*models.Object{
    96  			{
    97  				Class:  "Foo",
    98  				Vector: []float32{0.1, 0.1, 0.1111},
    99  			},
   100  			{
   101  				Class:  "Foo",
   102  				Vector: []float32{0.2, 0.2, 0.2222},
   103  			},
   104  		}
   105  
   106  		for range objects {
   107  			modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
   108  				Return(nil, nil)
   109  		}
   110  
   111  		_, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
   112  		repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects)
   113  
   114  		assert.Nil(t, err)
   115  		require.Len(t, repoCalledWithObjects, 2)
   116  		assert.Len(t, repoCalledWithObjects[0].UUID, 36,
   117  			"a uuid was set for the first object")
   118  		assert.Len(t, repoCalledWithObjects[1].UUID, 36,
   119  			"a uuid was set for the second object")
   120  		assert.Nil(t, repoCalledWithObjects[0].Err)
   121  		assert.Nil(t, repoCalledWithObjects[1].Err)
   122  		assert.Equal(t, models.C11yVector{0.1, 0.1, 0.1111}, repoCalledWithObjects[0].Object.Vector,
   123  			"the correct vector was used")
   124  		assert.Equal(t, models.C11yVector{0.2, 0.2, 0.2222}, repoCalledWithObjects[1].Object.Vector,
   125  			"the correct vector was used")
   126  	})
   127  
   128  	t.Run("with objects without IDs and nonexistent class and auto schema enabled", func(t *testing.T) {
   129  		resetAutoSchema(true)
   130  		vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
   131  		objects := []*models.Object{
   132  			{
   133  				Class:  "NonExistentFoo",
   134  				Vector: []float32{0.1, 0.1, 0.1111},
   135  			},
   136  			{
   137  				Class:  "NonExistentFoo",
   138  				Vector: []float32{0.2, 0.2, 0.2222},
   139  			},
   140  		}
   141  
   142  		for range objects {
   143  			modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
   144  				Return(nil, nil)
   145  		}
   146  
   147  		_, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
   148  		repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects)
   149  
   150  		assert.Nil(t, err)
   151  		require.Len(t, repoCalledWithObjects, 2)
   152  		assert.Len(t, repoCalledWithObjects[0].UUID, 36,
   153  			"a uuid was set for the first object")
   154  		assert.Len(t, repoCalledWithObjects[1].UUID, 36,
   155  			"a uuid was set for the second object")
   156  		assert.Nil(t, repoCalledWithObjects[0].Err)
   157  		assert.Nil(t, repoCalledWithObjects[1].Err)
   158  		assert.Equal(t, models.C11yVector{0.1, 0.1, 0.1111}, repoCalledWithObjects[0].Object.Vector,
   159  			"the correct vector was used")
   160  		assert.Equal(t, models.C11yVector{0.2, 0.2, 0.2222}, repoCalledWithObjects[1].Object.Vector,
   161  			"the correct vector was used")
   162  	})
   163  
   164  	t.Run("with user-specified IDs", func(t *testing.T) {
   165  		reset()
   166  		vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
   167  		id1 := strfmt.UUID("2d3942c3-b412-4d80-9dfa-99a646629cd2")
   168  		id2 := strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6")
   169  		objects := []*models.Object{
   170  			{
   171  				ID:     id1,
   172  				Class:  "Foo",
   173  				Vector: []float32{0.1, 0.1, 0.1111},
   174  			},
   175  			{
   176  				ID:     id2,
   177  				Class:  "Foo",
   178  				Vector: []float32{0.2, 0.2, 0.2222},
   179  			},
   180  		}
   181  
   182  		for range objects {
   183  			modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
   184  				Return(nil, nil)
   185  		}
   186  
   187  		_, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
   188  		repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects)
   189  
   190  		assert.Nil(t, err)
   191  		require.Len(t, repoCalledWithObjects, 2)
   192  		assert.Equal(t, id1, repoCalledWithObjects[0].UUID, "the user-specified uuid was used")
   193  		assert.Equal(t, id2, repoCalledWithObjects[1].UUID, "the user-specified uuid was used")
   194  		assert.Nil(t, repoCalledWithObjects[0].Err)
   195  		assert.Nil(t, repoCalledWithObjects[1].Err)
   196  		assert.Equal(t, models.C11yVector{0.1, 0.1, 0.1111}, repoCalledWithObjects[0].Object.Vector,
   197  			"the correct vector was used")
   198  		assert.Equal(t, models.C11yVector{0.2, 0.2, 0.2222}, repoCalledWithObjects[1].Object.Vector,
   199  			"the correct vector was used")
   200  	})
   201  
   202  	t.Run("with an invalid user-specified IDs", func(t *testing.T) {
   203  		reset()
   204  		vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
   205  		id1 := strfmt.UUID("invalid")
   206  		id2 := strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6")
   207  		objects := []*models.Object{
   208  			{
   209  				ID:     id1,
   210  				Class:  "Foo",
   211  				Vector: []float32{0.1, 0.1, 0.1111},
   212  			},
   213  			{
   214  				ID:     id2,
   215  				Class:  "Foo",
   216  				Vector: []float32{0.2, 0.2, 0.2222},
   217  			},
   218  		}
   219  
   220  		for range objects {
   221  			modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
   222  				Return(nil, nil)
   223  		}
   224  
   225  		_, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
   226  		repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects)
   227  
   228  		assert.Nil(t, err)
   229  		require.Len(t, repoCalledWithObjects, 2)
   230  		assert.Equal(t, repoCalledWithObjects[0].Err.Error(), fmt.Sprintf("invalid UUID length: %d", len(id1)))
   231  		assert.Equal(t, id2, repoCalledWithObjects[1].UUID, "the user-specified uuid was used")
   232  	})
   233  
   234  	t.Run("without any vectors", func(t *testing.T) {
   235  		// prior to v1.10 this was the desired behavior:
   236  		// note that this should fail on class Foo, but be accepted on class
   237  		// FooSkipped
   238  		//
   239  		// However, since v1.10, it is acceptable to exclude a vector, even if
   240  		// indexing is not skipped. In this case only the individual element is
   241  		// skipped. See https://github.com/weaviate/weaviate/issues/1800
   242  		reset()
   243  		vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
   244  		objects := []*models.Object{
   245  			{
   246  				Class: "Foo",
   247  			},
   248  			{
   249  				Class: "FooSkipped",
   250  			},
   251  		}
   252  
   253  		for range objects {
   254  			modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
   255  				Return(nil, nil)
   256  		}
   257  
   258  		_, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
   259  		repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects)
   260  
   261  		assert.Nil(t, err)
   262  		require.Len(t, repoCalledWithObjects, 2)
   263  		assert.Nil(t, repoCalledWithObjects[0].Err)
   264  		assert.Nil(t, repoCalledWithObjects[1].Err)
   265  	})
   266  }
   267  
   268  func Test_BatchManager_AddObjects_WithExternalVectorizerModule(t *testing.T) {
   269  	var (
   270  		vectorRepo      *fakeVectorRepo
   271  		modulesProvider *fakeModulesProvider
   272  		manager         *BatchManager
   273  	)
   274  
   275  	schema := schema.Schema{
   276  		Objects: &models.Schema{
   277  			Classes: []*models.Class{
   278  				{
   279  					Vectorizer:        config.VectorizerModuleText2VecContextionary,
   280  					VectorIndexConfig: hnsw.UserConfig{},
   281  					Class:             "Foo",
   282  				},
   283  			},
   284  		},
   285  	}
   286  
   287  	reset := func() {
   288  		vectorRepo = &fakeVectorRepo{}
   289  		config := &config.WeaviateConfig{}
   290  		locks := &fakeLocks{}
   291  		schemaManager := &fakeSchemaManager{
   292  			GetSchemaResponse: schema,
   293  		}
   294  		logger, _ := test.NewNullLogger()
   295  		authorizer := &fakeAuthorizer{}
   296  		modulesProvider = getFakeModulesProvider()
   297  		manager = NewBatchManager(vectorRepo, modulesProvider, locks,
   298  			schemaManager, config, logger, authorizer, nil)
   299  	}
   300  
   301  	ctx := context.Background()
   302  
   303  	t.Run("without any objects", func(t *testing.T) {
   304  		reset()
   305  		expectedErr := NewErrInvalidUserInput("invalid param 'objects': cannot be empty, need at least" +
   306  			" one object for batching")
   307  
   308  		_, err := manager.AddObjects(ctx, nil, []*models.Object{}, []*string{}, nil)
   309  
   310  		assert.Equal(t, expectedErr, err)
   311  	})
   312  
   313  	t.Run("with objects without IDs", func(t *testing.T) {
   314  		reset()
   315  		vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
   316  		expectedVector := models.C11yVector{0, 1, 2}
   317  		objects := []*models.Object{
   318  			{
   319  				Class: "Foo",
   320  			},
   321  			{
   322  				Class: "Foo",
   323  			},
   324  		}
   325  
   326  		for range objects {
   327  			modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
   328  				Return(expectedVector, nil)
   329  		}
   330  
   331  		_, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
   332  		repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects)
   333  
   334  		assert.Nil(t, err)
   335  		require.Len(t, repoCalledWithObjects, 2)
   336  		assert.Len(t, repoCalledWithObjects[0].UUID, 36, "a uuid was set for the first object")
   337  		assert.Len(t, repoCalledWithObjects[1].UUID, 36, "a uuid was set for the second object")
   338  		assert.Nil(t, repoCalledWithObjects[0].Err)
   339  		assert.Nil(t, repoCalledWithObjects[1].Err)
   340  		assert.Equal(t, expectedVector, repoCalledWithObjects[0].Object.Vector,
   341  			"the correct vector was used")
   342  		assert.Equal(t, expectedVector, repoCalledWithObjects[1].Object.Vector,
   343  			"the correct vector was used")
   344  	})
   345  
   346  	t.Run("with user-specified IDs", func(t *testing.T) {
   347  		reset()
   348  		vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
   349  		id1 := strfmt.UUID("2d3942c3-b412-4d80-9dfa-99a646629cd2")
   350  		id2 := strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6")
   351  		objects := []*models.Object{
   352  			{
   353  				ID:    id1,
   354  				Class: "Foo",
   355  			},
   356  			{
   357  				ID:    id2,
   358  				Class: "Foo",
   359  			},
   360  		}
   361  
   362  		for range objects {
   363  			modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
   364  				Return(nil, nil)
   365  		}
   366  
   367  		_, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
   368  		repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects)
   369  
   370  		assert.Nil(t, err)
   371  		require.Len(t, repoCalledWithObjects, 2)
   372  		assert.Equal(t, id1, repoCalledWithObjects[0].UUID, "the user-specified uuid was used")
   373  		assert.Equal(t, id2, repoCalledWithObjects[1].UUID, "the user-specified uuid was used")
   374  	})
   375  
   376  	t.Run("with an invalid user-specified IDs", func(t *testing.T) {
   377  		reset()
   378  		vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
   379  		id1 := strfmt.UUID("invalid")
   380  		id2 := strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6")
   381  		objects := []*models.Object{
   382  			{
   383  				ID:    id1,
   384  				Class: "Foo",
   385  			},
   386  			{
   387  				ID:    id2,
   388  				Class: "Foo",
   389  			},
   390  		}
   391  
   392  		for range objects {
   393  			modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
   394  				Return(nil, nil)
   395  		}
   396  
   397  		_, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
   398  		repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects)
   399  
   400  		assert.Nil(t, err)
   401  		require.Len(t, repoCalledWithObjects, 2)
   402  		assert.Equal(t, repoCalledWithObjects[0].Err.Error(), fmt.Sprintf("invalid UUID length: %d", len(id1)))
   403  		assert.Equal(t, id2, repoCalledWithObjects[1].UUID, "the user-specified uuid was used")
   404  	})
   405  }
   406  
   407  func Test_BatchManager_AddObjectsEmptyProperties(t *testing.T) {
   408  	var (
   409  		vectorRepo      *fakeVectorRepo
   410  		modulesProvider *fakeModulesProvider
   411  		manager         *BatchManager
   412  	)
   413  	schema := schema.Schema{
   414  		Objects: &models.Schema{
   415  			Classes: []*models.Class{
   416  				{
   417  					Class:             "TestClass",
   418  					VectorIndexConfig: hnsw.UserConfig{},
   419  
   420  					Properties: []*models.Property{
   421  						{
   422  							Name:         "strings",
   423  							DataType:     schema.DataTypeTextArray.PropString(),
   424  							Tokenization: models.PropertyTokenizationWhitespace,
   425  						},
   426  					},
   427  				},
   428  			},
   429  		},
   430  	}
   431  	reset := func() {
   432  		vectorRepo = &fakeVectorRepo{}
   433  		vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
   434  		config := &config.WeaviateConfig{}
   435  		locks := &fakeLocks{}
   436  		schemaManager := &fakeSchemaManager{
   437  			GetSchemaResponse: schema,
   438  		}
   439  		logger, _ := test.NewNullLogger()
   440  		authorizer := &fakeAuthorizer{}
   441  		modulesProvider = getFakeModulesProvider()
   442  		manager = NewBatchManager(vectorRepo, modulesProvider, locks,
   443  			schemaManager, config, logger, authorizer, nil)
   444  	}
   445  	reset()
   446  	objects := []*models.Object{
   447  		{
   448  			ID:    strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6"),
   449  			Class: "TestClass",
   450  		},
   451  		{
   452  			ID:    strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff3"),
   453  			Class: "TestClass",
   454  			Properties: map[string]interface{}{
   455  				"name": "testName",
   456  			},
   457  		},
   458  	}
   459  	require.Nil(t, objects[0].Properties)
   460  	require.NotNil(t, objects[1].Properties)
   461  
   462  	ctx := context.Background()
   463  	for range objects {
   464  		modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
   465  			Return(nil, nil)
   466  	}
   467  	addedObjects, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
   468  	assert.Nil(t, err)
   469  	require.Len(t, addedObjects, 2)
   470  	require.NotNil(t, addedObjects[0].Object.Properties)
   471  	require.NotNil(t, addedObjects[1].Object.Properties)
   472  }