github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/crud_update_integration_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 //go:build integrationTest 13 // +build integrationTest 14 15 package db 16 17 import ( 18 "context" 19 "testing" 20 21 "github.com/sirupsen/logrus" 22 "github.com/stretchr/testify/assert" 23 "github.com/stretchr/testify/require" 24 "github.com/weaviate/weaviate/adapters/repos/db/inverted" 25 "github.com/weaviate/weaviate/entities/additional" 26 "github.com/weaviate/weaviate/entities/dto" 27 "github.com/weaviate/weaviate/entities/filters" 28 "github.com/weaviate/weaviate/entities/models" 29 "github.com/weaviate/weaviate/entities/schema" 30 libschema "github.com/weaviate/weaviate/entities/schema" 31 "github.com/weaviate/weaviate/entities/search" 32 enthnsw "github.com/weaviate/weaviate/entities/vectorindex/hnsw" 33 ) 34 35 // Updates are non trivial, because vector indices are built under the 36 // assumption that items are immutable (this is true for HNSW, the assumption 37 // is that this is generally true in the majority of cases). Therefore an 38 // update is essentially a delete and a new import with a new doc ID. This 39 // needs to be tested extensively because there's a lot of room for error 40 // regarding the clean up of Doc ID pointers in the inverted indices, etc. 41 func TestUpdateJourney(t *testing.T) { 42 dirName := t.TempDir() 43 44 logger := logrus.New() 45 schemaGetter := &fakeSchemaGetter{ 46 schema: schema.Schema{Objects: &models.Schema{Classes: nil}}, 47 shardState: singleShardState(), 48 } 49 repo, err := New(logger, Config{ 50 MemtablesFlushDirtyAfter: 60, 51 RootPath: dirName, 52 QueryMaximumResults: 10000, 53 MaxImportGoroutinesFactor: 1, 54 }, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil) 55 require.Nil(t, err) 56 repo.SetSchemaGetter(schemaGetter) 57 require.Nil(t, repo.WaitForStartup(testCtx())) 58 defer repo.Shutdown(context.Background()) 59 migrator := NewMigrator(repo, logger) 60 61 schema := libschema.Schema{ 62 Objects: &models.Schema{ 63 Classes: []*models.Class{updateTestClass()}, 64 }, 65 } 66 67 t.Run("add schema", func(t *testing.T) { 68 err := migrator.AddClass(context.Background(), updateTestClass(), schemaGetter.shardState) 69 require.Nil(t, err) 70 }) 71 schemaGetter.schema = schema 72 73 t.Run("import some objects", func(t *testing.T) { 74 for _, res := range updateTestData() { 75 err := repo.PutObject(context.Background(), res.Object(), res.Vector, nil, nil) 76 require.Nil(t, err) 77 } 78 79 tracker := getTracker(repo, "UpdateTestClass") 80 81 require.Nil(t, err) 82 83 sum, count, mean, err := tracker.PropertyTally("name") 84 require.Nil(t, err) 85 assert.Equal(t, 4, sum) 86 assert.Equal(t, 4, count) 87 assert.InEpsilon(t, 1, mean, 0.1) 88 }) 89 90 searchVector := []float32{0.1, 0.1, 0.1} 91 92 t.Run("verify vector search results are initially as expected", 93 func(t *testing.T) { 94 res, err := repo.VectorSearch(context.Background(), dto.GetParams{ 95 ClassName: "UpdateTestClass", 96 SearchVector: searchVector, 97 Pagination: &filters.Pagination{ 98 Limit: 100, 99 }, 100 }) 101 102 expectedInAnyOrder := []interface{}{ 103 "element-0", "element-1", "element-2", "element-3", 104 } 105 106 require.Nil(t, err) 107 require.Len(t, res, 4) 108 assert.ElementsMatch(t, expectedInAnyOrder, extractPropValues(res, "name")) 109 }) 110 111 searchInv := func(t *testing.T, op filters.Operator, value int) []interface{} { 112 res, err := repo.ObjectSearch(context.Background(), 0, 100, 113 &filters.LocalFilter{ 114 Root: &filters.Clause{ 115 Operator: op, 116 On: &filters.Path{ 117 Class: "UpdateTestClass", 118 Property: libschema.PropertyName("intProp"), 119 }, 120 Value: &filters.Value{ 121 Type: libschema.DataTypeInt, 122 Value: value, 123 }, 124 }, 125 }, nil, additional.Properties{}, "") 126 require.Nil(t, err) 127 return extractPropValues(res, "name") 128 } 129 130 t.Run("verify invert index results are initially as expected", 131 func(t *testing.T) { 132 expectedInAnyOrder := []interface{}{ 133 "element-0", "element-1", "element-2", "element-3", 134 } 135 assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorGreaterThanEqual, 0)) 136 137 expectedInAnyOrder = []interface{}{"element-0"} 138 assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 0)) 139 140 expectedInAnyOrder = []interface{}{"element-1"} 141 assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 10)) 142 143 expectedInAnyOrder = []interface{}{"element-2"} 144 assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 20)) 145 146 expectedInAnyOrder = []interface{}{"element-3"} 147 assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 30)) 148 }) 149 150 t.Run("update vector position of one item to move it into a different direction", 151 func(t *testing.T) { 152 // updating element-0 to be very far away from our search vector 153 updatedVec := []float32{-0.1, -0.12, -0.105} 154 id := updateTestData()[0].ID 155 156 old, err := repo.ObjectByID(context.Background(), id, search.SelectProperties{}, additional.Properties{}, "") 157 require.Nil(t, err) 158 159 err = repo.PutObject(context.Background(), old.Object(), updatedVec, nil, nil) 160 require.Nil(t, err) 161 162 tracker := getTracker(repo, "UpdateTestClass") 163 164 require.Nil(t, err) 165 166 sum, count, mean, err := tracker.PropertyTally("name") 167 require.Nil(t, err) 168 assert.Equal(t, 4, sum) 169 assert.Equal(t, 4, count) 170 assert.InEpsilon(t, 1, mean, 0.1) 171 }) 172 173 t.Run("verify new vector search results are as expected", func(t *testing.T) { 174 res, err := repo.VectorSearch(context.Background(), dto.GetParams{ 175 ClassName: "UpdateTestClass", 176 SearchVector: searchVector, 177 Pagination: &filters.Pagination{ 178 Limit: 100, 179 }, 180 }) 181 182 expectedInAnyOrder := []interface{}{ 183 "element-0", "element-1", "element-2", "element-3", 184 } 185 186 require.Nil(t, err) 187 require.Len(t, res, 4) 188 assert.ElementsMatch(t, expectedInAnyOrder, extractPropValues(res, "name")) 189 }) 190 191 t.Run("verify invert results still work properly", func(t *testing.T) { 192 expectedInAnyOrder := []interface{}{ 193 "element-0", "element-1", "element-2", "element-3", 194 } 195 assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorGreaterThanEqual, 0)) 196 197 expectedInAnyOrder = []interface{}{"element-0"} 198 assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 0)) 199 200 expectedInAnyOrder = []interface{}{"element-1"} 201 assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 10)) 202 203 expectedInAnyOrder = []interface{}{"element-2"} 204 assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 20)) 205 206 expectedInAnyOrder = []interface{}{"element-3"} 207 assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 30)) 208 }) 209 210 t.Run("update a second object and modify vector and invert props at the same time", 211 func(t *testing.T) { 212 // this time we are updating element-2 and move it away from the search 213 // vector, as well as updating an invert prop 214 215 updatedVec := []float32{-0.1, -0.12, -0.105123} 216 id := updateTestData()[2].ID 217 218 old, err := repo.ObjectByID(context.Background(), id, search.SelectProperties{}, additional.Properties{}, "") 219 require.Nil(t, err) 220 221 old.Schema.(map[string]interface{})["intProp"] = int64(21) 222 err = repo.PutObject(context.Background(), old.Object(), updatedVec, nil, nil) 223 require.Nil(t, err) 224 225 tracker := getTracker(repo, "UpdateTestClass") 226 227 require.Nil(t, err) 228 229 sum, count, mean, err := tracker.PropertyTally("name") 230 require.Nil(t, err) 231 assert.Equal(t, 4, sum) 232 assert.Equal(t, 4, count) 233 assert.InEpsilon(t, 1, mean, 0.1) 234 }) 235 236 t.Run("verify new vector search results are as expected", func(t *testing.T) { 237 res, err := repo.VectorSearch(context.Background(), dto.GetParams{ 238 ClassName: "UpdateTestClass", 239 SearchVector: searchVector, 240 Pagination: &filters.Pagination{ 241 Limit: 100, 242 }, 243 }) 244 245 expectedInAnyOrder := []interface{}{ 246 "element-0", "element-1", "element-2", "element-3", 247 } 248 249 require.Nil(t, err) 250 require.Len(t, res, 4) 251 assert.ElementsMatch(t, expectedInAnyOrder, extractPropValues(res, "name")) 252 }) 253 254 t.Run("verify invert results have been updated correctly", func(t *testing.T) { 255 expectedInAnyOrder := []interface{}{ 256 "element-0", "element-1", "element-2", "element-3", 257 } 258 assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorGreaterThanEqual, 0)) 259 260 expectedInAnyOrder = []interface{}{"element-0"} 261 assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 0)) 262 263 expectedInAnyOrder = []interface{}{"element-1"} 264 assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 10)) 265 266 expectedInAnyOrder = []interface{}{} // value is no longer 20, but 21 267 assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 20)) 268 269 expectedInAnyOrder = []interface{}{"element-2"} 270 assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 21)) 271 272 expectedInAnyOrder = []interface{}{"element-3"} 273 assert.ElementsMatch(t, expectedInAnyOrder, searchInv(t, filters.OperatorEqual, 30)) 274 }) 275 276 t.Run("test recount", func(t *testing.T) { 277 tracker := getTracker(repo, "UpdateTestClass") 278 279 require.Nil(t, err) 280 281 sum, count, mean, err := tracker.PropertyTally("name") 282 require.Nil(t, err) 283 assert.Equal(t, 4, sum) 284 assert.Equal(t, 4, count) 285 assert.InEpsilon(t, 1, mean, 0.1) 286 287 tracker.Clear() 288 sum, count, mean, err = tracker.PropertyTally("name") 289 require.Nil(t, err) 290 assert.Equal(t, 0, sum) 291 assert.Equal(t, 0, count) 292 assert.Equal(t, float64(0), mean) 293 294 logger := logrus.New() 295 migrator := NewMigrator(repo, logger) 296 migrator.RecountProperties(context.Background()) 297 298 sum, count, mean, err = tracker.PropertyTally("name") 299 require.Nil(t, err) 300 assert.Equal(t, 4, sum) 301 assert.Equal(t, 4, count) 302 assert.Equal(t, float64(1), mean) 303 }) 304 } 305 306 func updateTestClass() *models.Class { 307 return &models.Class{ 308 Class: "UpdateTestClass", 309 VectorIndexConfig: enthnsw.NewDefaultUserConfig(), 310 InvertedIndexConfig: &models.InvertedIndexConfig{ 311 CleanupIntervalSeconds: 3, 312 }, 313 Properties: []*models.Property{ 314 { 315 DataType: []string{string(schema.DataTypeInt)}, 316 Name: "intProp", 317 }, 318 { 319 DataType: schema.DataTypeText.PropString(), 320 Tokenization: models.PropertyTokenizationWhitespace, 321 Name: "name", 322 }, 323 }, 324 } 325 } 326 327 func updateTestData() search.Results { 328 return search.Results{ 329 search.Result{ 330 ClassName: "UpdateTestClass", 331 ID: "426b0b29-9ded-40b6-b786-da3d1fec412f", 332 Schema: map[string]interface{}{ 333 "intProp": int64(0), 334 "name": "element-0", 335 }, 336 Vector: []float32{0.89379513, 0.67022973, 0.57360715}, 337 }, 338 search.Result{ 339 ClassName: "UpdateTestClass", 340 ID: "a1560f12-f0f0-4439-b5b8-b7bcecf5fed7", 341 342 Schema: map[string]interface{}{ 343 "intProp": int64(10), 344 "name": "element-1", 345 }, 346 Vector: []float32{0.9660323, 0.35887036, 0.6072966}, 347 }, 348 search.Result{ 349 ClassName: "UpdateTestClass", 350 ID: "0c73f145-5dc4-49a9-bd58-82725f8b13fa", 351 352 Schema: map[string]interface{}{ 353 "intProp": int64(20), 354 "name": "element-2", 355 }, 356 Vector: []float32{0.8194746, 0.56142205, 0.5130103}, 357 }, 358 search.Result{ 359 ClassName: "UpdateTestClass", 360 ID: "aec8462e-276a-4989-a612-8314c35d163a", 361 Schema: map[string]interface{}{ 362 "intProp": int64(30), 363 "name": "element-3", 364 }, 365 Vector: []float32{0.42401955, 0.8278863, 0.5952888}, 366 }, 367 } 368 } 369 370 func extractPropValues(in search.Results, propName string) []interface{} { 371 out := make([]interface{}, len(in)) 372 373 for i, res := range in { 374 out[i] = res.Schema.(map[string]interface{})[propName] 375 } 376 377 return out 378 } 379 380 func getTracker(repo *DB, className string) *inverted.JsonPropertyLengthTracker { 381 index := repo.GetIndex("UpdateTestClass") 382 var shard ShardLike 383 index.ForEachShard(func(name string, shardv ShardLike) error { 384 shard = shardv 385 return nil 386 }) 387 388 tracker := shard.GetPropertyLengthTracker() 389 390 return tracker 391 }