github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/index_integration_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 //go:build integrationTest 13 // +build integrationTest 14 15 package db 16 17 import ( 18 "context" 19 "os" 20 "path" 21 "testing" 22 23 "github.com/go-openapi/strfmt" 24 "github.com/sirupsen/logrus/hooks/test" 25 "github.com/stretchr/testify/assert" 26 "github.com/stretchr/testify/require" 27 "github.com/weaviate/weaviate/adapters/repos/db/inverted" 28 "github.com/weaviate/weaviate/entities/additional" 29 "github.com/weaviate/weaviate/entities/models" 30 "github.com/weaviate/weaviate/entities/schema" 31 "github.com/weaviate/weaviate/entities/storagestate" 32 "github.com/weaviate/weaviate/entities/storobj" 33 "github.com/weaviate/weaviate/entities/vectorindex/hnsw" 34 ) 35 36 func TestIndex_DropIndex(t *testing.T) { 37 dirName := t.TempDir() 38 class := &models.Class{Class: "deletetest"} 39 index := emptyIdx(t, dirName, class) 40 41 indexFilesBeforeDelete, err := getIndexFilenames(dirName, class.Class) 42 require.Nil(t, err) 43 44 err = index.drop() 45 require.Nil(t, err) 46 47 indexFilesAfterDelete, err := getIndexFilenames(dirName, class.Class) 48 require.Nil(t, err) 49 50 assert.Equal(t, 6, len(indexFilesBeforeDelete)) 51 assert.Equal(t, 0, len(indexFilesAfterDelete)) 52 } 53 54 func TestIndex_DropEmptyAndRecreateEmptyIndex(t *testing.T) { 55 dirName := t.TempDir() 56 class := &models.Class{Class: "deletetest"} 57 index := emptyIdx(t, dirName, class) 58 59 indexFilesBeforeDelete, err := getIndexFilenames(dirName, class.Class) 60 require.Nil(t, err) 61 62 // drop the index 63 err = index.drop() 64 require.Nil(t, err) 65 66 indexFilesAfterDelete, err := getIndexFilenames(dirName, class.Class) 67 require.Nil(t, err) 68 69 index = emptyIdx(t, dirName, class) 70 71 indexFilesAfterRecreate, err := getIndexFilenames(dirName, class.Class) 72 require.Nil(t, err) 73 74 assert.Equal(t, 6, len(indexFilesBeforeDelete)) 75 assert.Equal(t, 0, len(indexFilesAfterDelete)) 76 assert.Equal(t, 6, len(indexFilesAfterRecreate)) 77 78 err = index.drop() 79 require.Nil(t, err) 80 } 81 82 func TestIndex_DropWithDataAndRecreateWithDataIndex(t *testing.T) { 83 dirName := t.TempDir() 84 logger, _ := test.NewNullLogger() 85 class := &models.Class{ 86 Class: "deletetest", 87 Properties: []*models.Property{ 88 { 89 Name: "name", 90 DataType: schema.DataTypeText.PropString(), 91 Tokenization: models.PropertyTokenizationWhitespace, 92 }, 93 }, 94 InvertedIndexConfig: &models.InvertedIndexConfig{}, 95 } 96 fakeSchema := schema.Schema{ 97 Objects: &models.Schema{ 98 Classes: []*models.Class{ 99 class, 100 }, 101 }, 102 } 103 // create index with data 104 shardState := singleShardState() 105 index, err := NewIndex(testCtx(), IndexConfig{ 106 RootPath: dirName, 107 ClassName: schema.ClassName(class.Class), 108 }, shardState, inverted.ConfigFromModel(class.InvertedIndexConfig), 109 hnsw.NewDefaultUserConfig(), nil, &fakeSchemaGetter{ 110 schema: fakeSchema, shardState: shardState, 111 }, nil, logger, nil, nil, nil, nil, class, nil, nil) 112 require.Nil(t, err) 113 114 productsIds := []strfmt.UUID{ 115 "1295c052-263d-4aae-99dd-920c5a370d06", 116 "1295c052-263d-4aae-99dd-920c5a370d07", 117 } 118 119 products := []map[string]interface{}{ 120 {"name": "one"}, 121 {"name": "two"}, 122 } 123 124 err = index.addUUIDProperty(context.TODO()) 125 require.Nil(t, err) 126 127 err = index.addProperty(context.TODO(), &models.Property{ 128 Name: "name", 129 DataType: schema.DataTypeText.PropString(), 130 Tokenization: models.PropertyTokenizationWhitespace, 131 }) 132 require.Nil(t, err) 133 134 for i, p := range products { 135 product := models.Object{ 136 Class: class.Class, 137 ID: productsIds[i], 138 Properties: p, 139 } 140 141 err := index.putObject(context.TODO(), storobj.FromObject( 142 &product, []float32{0.1, 0.2, 0.01, 0.2}, nil), nil) 143 require.Nil(t, err) 144 } 145 146 indexFilesBeforeDelete, err := getIndexFilenames(dirName, class.Class) 147 require.Nil(t, err) 148 149 beforeDeleteObj1, err := index.objectByID(context.TODO(), 150 productsIds[0], nil, additional.Properties{}, nil, "") 151 require.Nil(t, err) 152 153 beforeDeleteObj2, err := index.objectByID(context.TODO(), 154 productsIds[1], nil, additional.Properties{}, nil, "") 155 require.Nil(t, err) 156 157 // drop the index 158 err = index.drop() 159 require.Nil(t, err) 160 161 indexFilesAfterDelete, err := getIndexFilenames(dirName, class.Class) 162 require.Nil(t, err) 163 164 // recreate the index 165 index, err = NewIndex(testCtx(), IndexConfig{ 166 RootPath: dirName, 167 ClassName: schema.ClassName(class.Class), 168 }, shardState, inverted.ConfigFromModel(class.InvertedIndexConfig), 169 hnsw.NewDefaultUserConfig(), nil, &fakeSchemaGetter{ 170 schema: fakeSchema, 171 shardState: shardState, 172 }, nil, logger, nil, nil, nil, nil, class, nil, nil) 173 require.Nil(t, err) 174 175 err = index.addUUIDProperty(context.TODO()) 176 require.Nil(t, err) 177 err = index.addProperty(context.TODO(), &models.Property{ 178 Name: "name", 179 DataType: schema.DataTypeText.PropString(), 180 Tokenization: models.PropertyTokenizationWhitespace, 181 }) 182 require.Nil(t, err) 183 184 indexFilesAfterRecreate, err := getIndexFilenames(dirName, class.Class) 185 require.Nil(t, err) 186 187 afterRecreateObj1, err := index.objectByID(context.TODO(), 188 productsIds[0], nil, additional.Properties{}, nil, "") 189 require.Nil(t, err) 190 191 afterRecreateObj2, err := index.objectByID(context.TODO(), 192 productsIds[1], nil, additional.Properties{}, nil, "") 193 require.Nil(t, err) 194 195 // insert some data in the recreated index 196 for i, p := range products { 197 thing := models.Object{ 198 Class: class.Class, 199 ID: productsIds[i], 200 Properties: p, 201 } 202 203 err := index.putObject(context.TODO(), storobj.FromObject( 204 &thing, []float32{0.1, 0.2, 0.01, 0.2}, nil), nil) 205 require.Nil(t, err) 206 } 207 208 afterRecreateAndInsertObj1, err := index.objectByID(context.TODO(), 209 productsIds[0], nil, additional.Properties{}, nil, "") 210 require.Nil(t, err) 211 212 afterRecreateAndInsertObj2, err := index.objectByID(context.TODO(), 213 productsIds[1], nil, additional.Properties{}, nil, "") 214 require.Nil(t, err) 215 216 // update the index vectorIndexUserConfig 217 beforeVectorConfig, ok := index.vectorIndexUserConfig.(hnsw.UserConfig) 218 require.Equal(t, -1, beforeVectorConfig.EF) 219 require.True(t, ok) 220 beforeVectorConfig.EF = 99 221 err = index.updateVectorIndexConfig(context.TODO(), beforeVectorConfig) 222 require.Nil(t, err) 223 afterVectorConfig, ok := index.vectorIndexUserConfig.(hnsw.UserConfig) 224 require.True(t, ok) 225 require.Equal(t, 99, afterVectorConfig.EF) 226 227 assert.Equal(t, 6, len(indexFilesBeforeDelete)) 228 assert.Equal(t, 0, len(indexFilesAfterDelete)) 229 assert.Equal(t, 6, len(indexFilesAfterRecreate)) 230 assert.Equal(t, indexFilesBeforeDelete, indexFilesAfterRecreate) 231 assert.NotNil(t, beforeDeleteObj1) 232 assert.NotNil(t, beforeDeleteObj2) 233 assert.Empty(t, afterRecreateObj1) 234 assert.Empty(t, afterRecreateObj2) 235 assert.NotNil(t, afterRecreateAndInsertObj1) 236 assert.NotNil(t, afterRecreateAndInsertObj2) 237 } 238 239 func TestIndex_DropReadOnlyEmptyIndex(t *testing.T) { 240 ctx := testCtx() 241 class := &models.Class{Class: "deletetest"} 242 shard, index := testShard(t, ctx, class.Class) 243 244 err := index.updateShardStatus(ctx, shard.Name(), storagestate.StatusReadOnly.String()) 245 require.Nil(t, err) 246 247 err = index.drop() 248 require.Nil(t, err) 249 } 250 251 func TestIndex_DropReadOnlyIndexWithData(t *testing.T) { 252 ctx := testCtx() 253 dirName := t.TempDir() 254 logger, _ := test.NewNullLogger() 255 class := &models.Class{ 256 Class: "deletetest", 257 Properties: []*models.Property{ 258 { 259 Name: "name", 260 DataType: schema.DataTypeText.PropString(), 261 Tokenization: models.PropertyTokenizationWhitespace, 262 }, 263 }, 264 InvertedIndexConfig: &models.InvertedIndexConfig{}, 265 } 266 fakeSchema := schema.Schema{ 267 Objects: &models.Schema{ 268 Classes: []*models.Class{ 269 class, 270 }, 271 }, 272 } 273 274 shardState := singleShardState() 275 index, err := NewIndex(ctx, IndexConfig{ 276 RootPath: dirName, 277 ClassName: schema.ClassName(class.Class), 278 }, shardState, inverted.ConfigFromModel(class.InvertedIndexConfig), 279 hnsw.NewDefaultUserConfig(), nil, &fakeSchemaGetter{ 280 schema: fakeSchema, shardState: shardState, 281 }, nil, logger, nil, nil, nil, nil, class, nil, nil) 282 require.Nil(t, err) 283 284 productsIds := []strfmt.UUID{ 285 "1295c052-263d-4aae-99dd-920c5a370d06", 286 "1295c052-263d-4aae-99dd-920c5a370d07", 287 } 288 289 products := []map[string]interface{}{ 290 {"name": "one"}, 291 {"name": "two"}, 292 } 293 294 err = index.addUUIDProperty(ctx) 295 require.Nil(t, err) 296 297 err = index.addProperty(ctx, &models.Property{ 298 Name: "name", 299 DataType: schema.DataTypeText.PropString(), 300 Tokenization: models.PropertyTokenizationWhitespace, 301 }) 302 require.Nil(t, err) 303 304 for i, p := range products { 305 product := models.Object{ 306 Class: class.Class, 307 ID: productsIds[i], 308 Properties: p, 309 } 310 311 err := index.putObject(ctx, storobj.FromObject( 312 &product, []float32{0.1, 0.2, 0.01, 0.2}, nil), nil) 313 require.Nil(t, err) 314 } 315 316 // set all shards to readonly 317 index.ForEachShard(func(name string, shard ShardLike) error { 318 err = shard.UpdateStatus(storagestate.StatusReadOnly.String()) 319 require.Nil(t, err) 320 return nil 321 }) 322 323 err = index.drop() 324 require.Nil(t, err) 325 } 326 327 func emptyIdx(t *testing.T, rootDir string, class *models.Class) *Index { 328 logger, _ := test.NewNullLogger() 329 shardState := singleShardState() 330 331 idx, err := NewIndex(testCtx(), IndexConfig{ 332 RootPath: rootDir, 333 ClassName: schema.ClassName(class.Class), 334 DisableLazyLoadShards: true, 335 }, shardState, inverted.ConfigFromModel(invertedConfig()), 336 hnsw.NewDefaultUserConfig(), nil, &fakeSchemaGetter{ 337 shardState: shardState, 338 }, nil, logger, nil, nil, nil, nil, class, nil, nil) 339 require.Nil(t, err) 340 return idx 341 } 342 343 func invertedConfig() *models.InvertedIndexConfig { 344 return &models.InvertedIndexConfig{ 345 CleanupIntervalSeconds: 60, 346 Stopwords: &models.StopwordConfig{ 347 Preset: "none", 348 }, 349 IndexNullState: true, 350 IndexPropertyLength: true, 351 } 352 } 353 354 func getIndexFilenames(rootDir, indexName string) ([]string, error) { 355 var filenames []string 356 indexRoot, err := os.ReadDir(path.Join(rootDir, indexName)) 357 if err != nil { 358 if os.IsNotExist(err) { 359 // index was dropped, or never existed 360 return filenames, nil 361 } 362 return nil, err 363 } 364 shardFiles, err := os.ReadDir(path.Join(rootDir, indexName, indexRoot[0].Name())) 365 if err != nil { 366 return filenames, err 367 } 368 for _, f := range shardFiles { 369 filenames = append(filenames, f.Name()) 370 } 371 return filenames, nil 372 }