github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/crud_update_integration_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  //go:build integrationTest
    13  // +build integrationTest
    14  
    15  package db
    16  
    17  import (
    18  	"context"
    19  	"testing"
    20  
    21  	"github.com/sirupsen/logrus"
    22  	"github.com/stretchr/testify/assert"
    23  	"github.com/stretchr/testify/require"
    24  	"github.com/weaviate/weaviate/adapters/repos/db/inverted"
    25  	"github.com/weaviate/weaviate/entities/additional"
    26  	"github.com/weaviate/weaviate/entities/dto"
    27  	"github.com/weaviate/weaviate/entities/filters"
    28  	"github.com/weaviate/weaviate/entities/models"
    29  	"github.com/weaviate/weaviate/entities/schema"
    30  	libschema "github.com/weaviate/weaviate/entities/schema"
    31  	"github.com/weaviate/weaviate/entities/search"
    32  	enthnsw "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    33  )
    34  
    35  // Updates are non trivial, because vector indices are built under the
    36  // assumption that items are immutable (this is true for HNSW, the assumption
    37  // is that this is generally true in the majority of cases). Therefore an
    38  // update is essentially a delete and a new import with a new doc ID. This
    39  // needs to be tested extensively because there's a lot of room for error
    40  // regarding the clean up of Doc ID pointers in the inverted indices, etc.
    41  func TestUpdateJourney(t *testing.T) {
    42  	dirName := t.TempDir()
    43  
    44  	logger := logrus.New()
    45  	schemaGetter := &fakeSchemaGetter{
    46  		schema:     schema.Schema{Objects: &models.Schema{Classes: nil}},
    47  		shardState: singleShardState(),
    48  	}
    49  	repo, err := New(logger, Config{
    50  		MemtablesFlushDirtyAfter:  60,
    51  		RootPath:                  dirName,
    52  		QueryMaximumResults:       10000,
    53  		MaxImportGoroutinesFactor: 1,
    54  	}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil)
    55  	require.Nil(t, err)
    56  	repo.SetSchemaGetter(schemaGetter)
    57  	require.Nil(t, repo.WaitForStartup(testCtx()))
    58  	defer repo.Shutdown(context.Background())
    59  	migrator := NewMigrator(repo, logger)
    60  
    61  	schema := libschema.Schema{
    62  		Objects: &models.Schema{
    63  			Classes: []*models.Class{updateTestClass()},
    64  		},
    65  	}
    66  
    67  	t.Run("add schema", func(t *testing.T) {
    68  		err := migrator.AddClass(context.Background(), updateTestClass(), schemaGetter.shardState)
    69  		require.Nil(t, err)
    70  	})
    71  	schemaGetter.schema = schema
    72  
    73  	t.Run("import some objects", func(t *testing.T) {
    74  		for _, res := range updateTestData() {
    75  			err := repo.PutObject(context.Background(), res.Object(), res.Vector, nil, nil)
    76  			require.Nil(t, err)
    77  		}
    78  
    79  		tracker := getTracker(repo, "UpdateTestClass")
    80  
    81  		require.Nil(t, err)
    82  
    83  		sum, count, mean, err := tracker.PropertyTally("name")
    84  		require.Nil(t, err)
    85  		assert.Equal(t, 4, sum)
    86  		assert.Equal(t, 4, count)
    87  		assert.InEpsilon(t, 1, mean, 0.1)
    88  	})
    89  
    90  	searchVector := []float32{0.1, 0.1, 0.1}
    91  
    92  	t.Run("verify vector search results are initially as expected",
    93  		func(t *testing.T) {
    94  			res, err := repo.VectorSearch(context.Background(), dto.GetParams{
    95  				ClassName:    "UpdateTestClass",
    96  				SearchVector: searchVector,
    97  				Pagination: &filters.Pagination{
    98  					Limit: 100,
    99  				},
   100  			})
   101  
   102  			expectedInAnyOrder := []interface{}{
   103  				"element-0", "element-1", "element-2", "element-3",
   104  			}
   105  
   106  			require.Nil(t, err)
   107  			require.Len(t, res, 4)
   108  			assert.ElementsMatch(t, expectedInAnyOrder, extractPropValues(res, "name"))
   109  		})
   110  
   111  	searchInv := func(t *testing.T, op filters.Operator, value int) []interface{} {
   112  		res, err := repo.ObjectSearch(context.Background(), 0, 100,
   113  			&filters.LocalFilter{
   114  				Root: &filters.Clause{
   115  					Operator: op,
   116  					On: &filters.Path{
   117  						Class:    "UpdateTestClass",
   118  						Property: libschema.PropertyName("intProp"),
   119  					},
   120  					Value: &filters.Value{
   121  						Type:  libschema.DataTypeInt,
   122  						Value: value,
   123  					},
   124  				},
   125  			}, nil, additional.Properties{}, "")
   126  		require.Nil(t, err)
   127  		return extractPropValues(res, "name")
   128  	}
   129  
   130  	t.Run("verify invert index results are initially as expected",
   131  		func(t *testing.T) {
   132  			expectedInAnyOrder := []interface{}{
   133  				"element-0", "element-1", "element-2", "element-3",
   134  			}
   135  			assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorGreaterThanEqual, 0))
   136  
   137  			expectedInAnyOrder = []interface{}{"element-0"}
   138  			assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 0))
   139  
   140  			expectedInAnyOrder = []interface{}{"element-1"}
   141  			assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 10))
   142  
   143  			expectedInAnyOrder = []interface{}{"element-2"}
   144  			assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 20))
   145  
   146  			expectedInAnyOrder = []interface{}{"element-3"}
   147  			assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 30))
   148  		})
   149  
   150  	t.Run("update vector position of one item to move it into a different direction",
   151  		func(t *testing.T) {
   152  			// updating element-0 to be very far away from our search vector
   153  			updatedVec := []float32{-0.1, -0.12, -0.105}
   154  			id := updateTestData()[0].ID
   155  
   156  			old, err := repo.ObjectByID(context.Background(), id, search.SelectProperties{}, additional.Properties{}, "")
   157  			require.Nil(t, err)
   158  
   159  			err = repo.PutObject(context.Background(), old.Object(), updatedVec, nil, nil)
   160  			require.Nil(t, err)
   161  
   162  			tracker := getTracker(repo, "UpdateTestClass")
   163  
   164  			require.Nil(t, err)
   165  
   166  			sum, count, mean, err := tracker.PropertyTally("name")
   167  			require.Nil(t, err)
   168  			assert.Equal(t, 4, sum)
   169  			assert.Equal(t, 4, count)
   170  			assert.InEpsilon(t, 1, mean, 0.1)
   171  		})
   172  
   173  	t.Run("verify new vector search results are as expected", func(t *testing.T) {
   174  		res, err := repo.VectorSearch(context.Background(), dto.GetParams{
   175  			ClassName:    "UpdateTestClass",
   176  			SearchVector: searchVector,
   177  			Pagination: &filters.Pagination{
   178  				Limit: 100,
   179  			},
   180  		})
   181  
   182  		expectedInAnyOrder := []interface{}{
   183  			"element-0", "element-1", "element-2", "element-3",
   184  		}
   185  
   186  		require.Nil(t, err)
   187  		require.Len(t, res, 4)
   188  		assert.ElementsMatch(t, expectedInAnyOrder, extractPropValues(res, "name"))
   189  	})
   190  
   191  	t.Run("verify invert results still work properly", func(t *testing.T) {
   192  		expectedInAnyOrder := []interface{}{
   193  			"element-0", "element-1", "element-2", "element-3",
   194  		}
   195  		assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorGreaterThanEqual, 0))
   196  
   197  		expectedInAnyOrder = []interface{}{"element-0"}
   198  		assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 0))
   199  
   200  		expectedInAnyOrder = []interface{}{"element-1"}
   201  		assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 10))
   202  
   203  		expectedInAnyOrder = []interface{}{"element-2"}
   204  		assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 20))
   205  
   206  		expectedInAnyOrder = []interface{}{"element-3"}
   207  		assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 30))
   208  	})
   209  
   210  	t.Run("update a second object and modify vector and invert props at the same time",
   211  		func(t *testing.T) {
   212  			// this time we are updating element-2 and move it away from the search
   213  			// vector, as well as updating an invert prop
   214  
   215  			updatedVec := []float32{-0.1, -0.12, -0.105123}
   216  			id := updateTestData()[2].ID
   217  
   218  			old, err := repo.ObjectByID(context.Background(), id, search.SelectProperties{}, additional.Properties{}, "")
   219  			require.Nil(t, err)
   220  
   221  			old.Schema.(map[string]interface{})["intProp"] = int64(21)
   222  			err = repo.PutObject(context.Background(), old.Object(), updatedVec, nil, nil)
   223  			require.Nil(t, err)
   224  
   225  			tracker := getTracker(repo, "UpdateTestClass")
   226  
   227  			require.Nil(t, err)
   228  
   229  			sum, count, mean, err := tracker.PropertyTally("name")
   230  			require.Nil(t, err)
   231  			assert.Equal(t, 4, sum)
   232  			assert.Equal(t, 4, count)
   233  			assert.InEpsilon(t, 1, mean, 0.1)
   234  		})
   235  
   236  	t.Run("verify new vector search results are as expected", func(t *testing.T) {
   237  		res, err := repo.VectorSearch(context.Background(), dto.GetParams{
   238  			ClassName:    "UpdateTestClass",
   239  			SearchVector: searchVector,
   240  			Pagination: &filters.Pagination{
   241  				Limit: 100,
   242  			},
   243  		})
   244  
   245  		expectedInAnyOrder := []interface{}{
   246  			"element-0", "element-1", "element-2", "element-3",
   247  		}
   248  
   249  		require.Nil(t, err)
   250  		require.Len(t, res, 4)
   251  		assert.ElementsMatch(t, expectedInAnyOrder, extractPropValues(res, "name"))
   252  	})
   253  
   254  	t.Run("verify invert results have been updated correctly", func(t *testing.T) {
   255  		expectedInAnyOrder := []interface{}{
   256  			"element-0", "element-1", "element-2", "element-3",
   257  		}
   258  		assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorGreaterThanEqual, 0))
   259  
   260  		expectedInAnyOrder = []interface{}{"element-0"}
   261  		assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 0))
   262  
   263  		expectedInAnyOrder = []interface{}{"element-1"}
   264  		assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 10))
   265  
   266  		expectedInAnyOrder = []interface{}{} // value is no longer 20, but 21
   267  		assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 20))
   268  
   269  		expectedInAnyOrder = []interface{}{"element-2"}
   270  		assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 21))
   271  
   272  		expectedInAnyOrder = []interface{}{"element-3"}
   273  		assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 30))
   274  	})
   275  
   276  	t.Run("test recount", func(t *testing.T) {
   277  		tracker := getTracker(repo, "UpdateTestClass")
   278  
   279  		require.Nil(t, err)
   280  
   281  		sum, count, mean, err := tracker.PropertyTally("name")
   282  		require.Nil(t, err)
   283  		assert.Equal(t, 4, sum)
   284  		assert.Equal(t, 4, count)
   285  		assert.InEpsilon(t, 1, mean, 0.1)
   286  
   287  		tracker.Clear()
   288  		sum, count, mean, err = tracker.PropertyTally("name")
   289  		require.Nil(t, err)
   290  		assert.Equal(t, 0, sum)
   291  		assert.Equal(t, 0, count)
   292  		assert.Equal(t, float64(0), mean)
   293  
   294  		logger := logrus.New()
   295  		migrator := NewMigrator(repo, logger)
   296  		migrator.RecountProperties(context.Background())
   297  
   298  		sum, count, mean, err = tracker.PropertyTally("name")
   299  		require.Nil(t, err)
   300  		assert.Equal(t, 4, sum)
   301  		assert.Equal(t, 4, count)
   302  		assert.Equal(t, float64(1), mean)
   303  	})
   304  }
   305  
   306  func updateTestClass() *models.Class {
   307  	return &models.Class{
   308  		Class:             "UpdateTestClass",
   309  		VectorIndexConfig: enthnsw.NewDefaultUserConfig(),
   310  		InvertedIndexConfig: &models.InvertedIndexConfig{
   311  			CleanupIntervalSeconds: 3,
   312  		},
   313  		Properties: []*models.Property{
   314  			{
   315  				DataType: []string{string(schema.DataTypeInt)},
   316  				Name:     "intProp",
   317  			},
   318  			{
   319  				DataType:     schema.DataTypeText.PropString(),
   320  				Tokenization: models.PropertyTokenizationWhitespace,
   321  				Name:         "name",
   322  			},
   323  		},
   324  	}
   325  }
   326  
   327  func updateTestData() search.Results {
   328  	return search.Results{
   329  		search.Result{
   330  			ClassName: "UpdateTestClass",
   331  			ID:        "426b0b29-9ded-40b6-b786-da3d1fec412f",
   332  			Schema: map[string]interface{}{
   333  				"intProp": int64(0),
   334  				"name":    "element-0",
   335  			},
   336  			Vector: []float32{0.89379513, 0.67022973, 0.57360715},
   337  		},
   338  		search.Result{
   339  			ClassName: "UpdateTestClass",
   340  			ID:        "a1560f12-f0f0-4439-b5b8-b7bcecf5fed7",
   341  
   342  			Schema: map[string]interface{}{
   343  				"intProp": int64(10),
   344  				"name":    "element-1",
   345  			},
   346  			Vector: []float32{0.9660323, 0.35887036, 0.6072966},
   347  		},
   348  		search.Result{
   349  			ClassName: "UpdateTestClass",
   350  			ID:        "0c73f145-5dc4-49a9-bd58-82725f8b13fa",
   351  
   352  			Schema: map[string]interface{}{
   353  				"intProp": int64(20),
   354  				"name":    "element-2",
   355  			},
   356  			Vector: []float32{0.8194746, 0.56142205, 0.5130103},
   357  		},
   358  		search.Result{
   359  			ClassName: "UpdateTestClass",
   360  			ID:        "aec8462e-276a-4989-a612-8314c35d163a",
   361  			Schema: map[string]interface{}{
   362  				"intProp": int64(30),
   363  				"name":    "element-3",
   364  			},
   365  			Vector: []float32{0.42401955, 0.8278863, 0.5952888},
   366  		},
   367  	}
   368  }
   369  
   370  func extractPropValues(in search.Results, propName string) []interface{} {
   371  	out := make([]interface{}, len(in))
   372  
   373  	for i, res := range in {
   374  		out[i] = res.Schema.(map[string]interface{})[propName]
   375  	}
   376  
   377  	return out
   378  }
   379  
   380  func getTracker(repo *DB, className string) *inverted.JsonPropertyLengthTracker {
   381  	index := repo.GetIndex("UpdateTestClass")
   382  	var shard ShardLike
   383  	index.ForEachShard(func(name string, shardv ShardLike) error {
   384  		shard = shardv
   385  		return nil
   386  	})
   387  
   388  	tracker := shard.GetPropertyLengthTracker()
   389  
   390  	return tracker
   391  }