github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/index_integration_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  //go:build integrationTest
    13  // +build integrationTest
    14  
    15  package db
    16  
    17  import (
    18  	"context"
    19  	"os"
    20  	"path"
    21  	"testing"
    22  
    23  	"github.com/go-openapi/strfmt"
    24  	"github.com/sirupsen/logrus/hooks/test"
    25  	"github.com/stretchr/testify/assert"
    26  	"github.com/stretchr/testify/require"
    27  	"github.com/weaviate/weaviate/adapters/repos/db/inverted"
    28  	"github.com/weaviate/weaviate/entities/additional"
    29  	"github.com/weaviate/weaviate/entities/models"
    30  	"github.com/weaviate/weaviate/entities/schema"
    31  	"github.com/weaviate/weaviate/entities/storagestate"
    32  	"github.com/weaviate/weaviate/entities/storobj"
    33  	"github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    34  )
    35  
    36  func TestIndex_DropIndex(t *testing.T) {
    37  	dirName := t.TempDir()
    38  	class := &models.Class{Class: "deletetest"}
    39  	index := emptyIdx(t, dirName, class)
    40  
    41  	indexFilesBeforeDelete, err := getIndexFilenames(dirName, class.Class)
    42  	require.Nil(t, err)
    43  
    44  	err = index.drop()
    45  	require.Nil(t, err)
    46  
    47  	indexFilesAfterDelete, err := getIndexFilenames(dirName, class.Class)
    48  	require.Nil(t, err)
    49  
    50  	assert.Equal(t, 6, len(indexFilesBeforeDelete))
    51  	assert.Equal(t, 0, len(indexFilesAfterDelete))
    52  }
    53  
    54  func TestIndex_DropEmptyAndRecreateEmptyIndex(t *testing.T) {
    55  	dirName := t.TempDir()
    56  	class := &models.Class{Class: "deletetest"}
    57  	index := emptyIdx(t, dirName, class)
    58  
    59  	indexFilesBeforeDelete, err := getIndexFilenames(dirName, class.Class)
    60  	require.Nil(t, err)
    61  
    62  	// drop the index
    63  	err = index.drop()
    64  	require.Nil(t, err)
    65  
    66  	indexFilesAfterDelete, err := getIndexFilenames(dirName, class.Class)
    67  	require.Nil(t, err)
    68  
    69  	index = emptyIdx(t, dirName, class)
    70  
    71  	indexFilesAfterRecreate, err := getIndexFilenames(dirName, class.Class)
    72  	require.Nil(t, err)
    73  
    74  	assert.Equal(t, 6, len(indexFilesBeforeDelete))
    75  	assert.Equal(t, 0, len(indexFilesAfterDelete))
    76  	assert.Equal(t, 6, len(indexFilesAfterRecreate))
    77  
    78  	err = index.drop()
    79  	require.Nil(t, err)
    80  }
    81  
    82  func TestIndex_DropWithDataAndRecreateWithDataIndex(t *testing.T) {
    83  	dirName := t.TempDir()
    84  	logger, _ := test.NewNullLogger()
    85  	class := &models.Class{
    86  		Class: "deletetest",
    87  		Properties: []*models.Property{
    88  			{
    89  				Name:         "name",
    90  				DataType:     schema.DataTypeText.PropString(),
    91  				Tokenization: models.PropertyTokenizationWhitespace,
    92  			},
    93  		},
    94  		InvertedIndexConfig: &models.InvertedIndexConfig{},
    95  	}
    96  	fakeSchema := schema.Schema{
    97  		Objects: &models.Schema{
    98  			Classes: []*models.Class{
    99  				class,
   100  			},
   101  		},
   102  	}
   103  	// create index with data
   104  	shardState := singleShardState()
   105  	index, err := NewIndex(testCtx(), IndexConfig{
   106  		RootPath:  dirName,
   107  		ClassName: schema.ClassName(class.Class),
   108  	}, shardState, inverted.ConfigFromModel(class.InvertedIndexConfig),
   109  		hnsw.NewDefaultUserConfig(), nil, &fakeSchemaGetter{
   110  			schema: fakeSchema, shardState: shardState,
   111  		}, nil, logger, nil, nil, nil, nil, class, nil, nil)
   112  	require.Nil(t, err)
   113  
   114  	productsIds := []strfmt.UUID{
   115  		"1295c052-263d-4aae-99dd-920c5a370d06",
   116  		"1295c052-263d-4aae-99dd-920c5a370d07",
   117  	}
   118  
   119  	products := []map[string]interface{}{
   120  		{"name": "one"},
   121  		{"name": "two"},
   122  	}
   123  
   124  	err = index.addUUIDProperty(context.TODO())
   125  	require.Nil(t, err)
   126  
   127  	err = index.addProperty(context.TODO(), &models.Property{
   128  		Name:         "name",
   129  		DataType:     schema.DataTypeText.PropString(),
   130  		Tokenization: models.PropertyTokenizationWhitespace,
   131  	})
   132  	require.Nil(t, err)
   133  
   134  	for i, p := range products {
   135  		product := models.Object{
   136  			Class:      class.Class,
   137  			ID:         productsIds[i],
   138  			Properties: p,
   139  		}
   140  
   141  		err := index.putObject(context.TODO(), storobj.FromObject(
   142  			&product, []float32{0.1, 0.2, 0.01, 0.2}, nil), nil)
   143  		require.Nil(t, err)
   144  	}
   145  
   146  	indexFilesBeforeDelete, err := getIndexFilenames(dirName, class.Class)
   147  	require.Nil(t, err)
   148  
   149  	beforeDeleteObj1, err := index.objectByID(context.TODO(),
   150  		productsIds[0], nil, additional.Properties{}, nil, "")
   151  	require.Nil(t, err)
   152  
   153  	beforeDeleteObj2, err := index.objectByID(context.TODO(),
   154  		productsIds[1], nil, additional.Properties{}, nil, "")
   155  	require.Nil(t, err)
   156  
   157  	// drop the index
   158  	err = index.drop()
   159  	require.Nil(t, err)
   160  
   161  	indexFilesAfterDelete, err := getIndexFilenames(dirName, class.Class)
   162  	require.Nil(t, err)
   163  
   164  	// recreate the index
   165  	index, err = NewIndex(testCtx(), IndexConfig{
   166  		RootPath:  dirName,
   167  		ClassName: schema.ClassName(class.Class),
   168  	}, shardState, inverted.ConfigFromModel(class.InvertedIndexConfig),
   169  		hnsw.NewDefaultUserConfig(), nil, &fakeSchemaGetter{
   170  			schema:     fakeSchema,
   171  			shardState: shardState,
   172  		}, nil, logger, nil, nil, nil, nil, class, nil, nil)
   173  	require.Nil(t, err)
   174  
   175  	err = index.addUUIDProperty(context.TODO())
   176  	require.Nil(t, err)
   177  	err = index.addProperty(context.TODO(), &models.Property{
   178  		Name:         "name",
   179  		DataType:     schema.DataTypeText.PropString(),
   180  		Tokenization: models.PropertyTokenizationWhitespace,
   181  	})
   182  	require.Nil(t, err)
   183  
   184  	indexFilesAfterRecreate, err := getIndexFilenames(dirName, class.Class)
   185  	require.Nil(t, err)
   186  
   187  	afterRecreateObj1, err := index.objectByID(context.TODO(),
   188  		productsIds[0], nil, additional.Properties{}, nil, "")
   189  	require.Nil(t, err)
   190  
   191  	afterRecreateObj2, err := index.objectByID(context.TODO(),
   192  		productsIds[1], nil, additional.Properties{}, nil, "")
   193  	require.Nil(t, err)
   194  
   195  	// insert some data in the recreated index
   196  	for i, p := range products {
   197  		thing := models.Object{
   198  			Class:      class.Class,
   199  			ID:         productsIds[i],
   200  			Properties: p,
   201  		}
   202  
   203  		err := index.putObject(context.TODO(), storobj.FromObject(
   204  			&thing, []float32{0.1, 0.2, 0.01, 0.2}, nil), nil)
   205  		require.Nil(t, err)
   206  	}
   207  
   208  	afterRecreateAndInsertObj1, err := index.objectByID(context.TODO(),
   209  		productsIds[0], nil, additional.Properties{}, nil, "")
   210  	require.Nil(t, err)
   211  
   212  	afterRecreateAndInsertObj2, err := index.objectByID(context.TODO(),
   213  		productsIds[1], nil, additional.Properties{}, nil, "")
   214  	require.Nil(t, err)
   215  
   216  	// update the index vectorIndexUserConfig
   217  	beforeVectorConfig, ok := index.vectorIndexUserConfig.(hnsw.UserConfig)
   218  	require.Equal(t, -1, beforeVectorConfig.EF)
   219  	require.True(t, ok)
   220  	beforeVectorConfig.EF = 99
   221  	err = index.updateVectorIndexConfig(context.TODO(), beforeVectorConfig)
   222  	require.Nil(t, err)
   223  	afterVectorConfig, ok := index.vectorIndexUserConfig.(hnsw.UserConfig)
   224  	require.True(t, ok)
   225  	require.Equal(t, 99, afterVectorConfig.EF)
   226  
   227  	assert.Equal(t, 6, len(indexFilesBeforeDelete))
   228  	assert.Equal(t, 0, len(indexFilesAfterDelete))
   229  	assert.Equal(t, 6, len(indexFilesAfterRecreate))
   230  	assert.Equal(t, indexFilesBeforeDelete, indexFilesAfterRecreate)
   231  	assert.NotNil(t, beforeDeleteObj1)
   232  	assert.NotNil(t, beforeDeleteObj2)
   233  	assert.Empty(t, afterRecreateObj1)
   234  	assert.Empty(t, afterRecreateObj2)
   235  	assert.NotNil(t, afterRecreateAndInsertObj1)
   236  	assert.NotNil(t, afterRecreateAndInsertObj2)
   237  }
   238  
   239  func TestIndex_DropReadOnlyEmptyIndex(t *testing.T) {
   240  	ctx := testCtx()
   241  	class := &models.Class{Class: "deletetest"}
   242  	shard, index := testShard(t, ctx, class.Class)
   243  
   244  	err := index.updateShardStatus(ctx, shard.Name(), storagestate.StatusReadOnly.String())
   245  	require.Nil(t, err)
   246  
   247  	err = index.drop()
   248  	require.Nil(t, err)
   249  }
   250  
   251  func TestIndex_DropReadOnlyIndexWithData(t *testing.T) {
   252  	ctx := testCtx()
   253  	dirName := t.TempDir()
   254  	logger, _ := test.NewNullLogger()
   255  	class := &models.Class{
   256  		Class: "deletetest",
   257  		Properties: []*models.Property{
   258  			{
   259  				Name:         "name",
   260  				DataType:     schema.DataTypeText.PropString(),
   261  				Tokenization: models.PropertyTokenizationWhitespace,
   262  			},
   263  		},
   264  		InvertedIndexConfig: &models.InvertedIndexConfig{},
   265  	}
   266  	fakeSchema := schema.Schema{
   267  		Objects: &models.Schema{
   268  			Classes: []*models.Class{
   269  				class,
   270  			},
   271  		},
   272  	}
   273  
   274  	shardState := singleShardState()
   275  	index, err := NewIndex(ctx, IndexConfig{
   276  		RootPath:  dirName,
   277  		ClassName: schema.ClassName(class.Class),
   278  	}, shardState, inverted.ConfigFromModel(class.InvertedIndexConfig),
   279  		hnsw.NewDefaultUserConfig(), nil, &fakeSchemaGetter{
   280  			schema: fakeSchema, shardState: shardState,
   281  		}, nil, logger, nil, nil, nil, nil, class, nil, nil)
   282  	require.Nil(t, err)
   283  
   284  	productsIds := []strfmt.UUID{
   285  		"1295c052-263d-4aae-99dd-920c5a370d06",
   286  		"1295c052-263d-4aae-99dd-920c5a370d07",
   287  	}
   288  
   289  	products := []map[string]interface{}{
   290  		{"name": "one"},
   291  		{"name": "two"},
   292  	}
   293  
   294  	err = index.addUUIDProperty(ctx)
   295  	require.Nil(t, err)
   296  
   297  	err = index.addProperty(ctx, &models.Property{
   298  		Name:         "name",
   299  		DataType:     schema.DataTypeText.PropString(),
   300  		Tokenization: models.PropertyTokenizationWhitespace,
   301  	})
   302  	require.Nil(t, err)
   303  
   304  	for i, p := range products {
   305  		product := models.Object{
   306  			Class:      class.Class,
   307  			ID:         productsIds[i],
   308  			Properties: p,
   309  		}
   310  
   311  		err := index.putObject(ctx, storobj.FromObject(
   312  			&product, []float32{0.1, 0.2, 0.01, 0.2}, nil), nil)
   313  		require.Nil(t, err)
   314  	}
   315  
   316  	// set all shards to readonly
   317  	index.ForEachShard(func(name string, shard ShardLike) error {
   318  		err = shard.UpdateStatus(storagestate.StatusReadOnly.String())
   319  		require.Nil(t, err)
   320  		return nil
   321  	})
   322  
   323  	err = index.drop()
   324  	require.Nil(t, err)
   325  }
   326  
   327  func emptyIdx(t *testing.T, rootDir string, class *models.Class) *Index {
   328  	logger, _ := test.NewNullLogger()
   329  	shardState := singleShardState()
   330  
   331  	idx, err := NewIndex(testCtx(), IndexConfig{
   332  		RootPath:              rootDir,
   333  		ClassName:             schema.ClassName(class.Class),
   334  		DisableLazyLoadShards: true,
   335  	}, shardState, inverted.ConfigFromModel(invertedConfig()),
   336  		hnsw.NewDefaultUserConfig(), nil, &fakeSchemaGetter{
   337  			shardState: shardState,
   338  		}, nil, logger, nil, nil, nil, nil, class, nil, nil)
   339  	require.Nil(t, err)
   340  	return idx
   341  }
   342  
   343  func invertedConfig() *models.InvertedIndexConfig {
   344  	return &models.InvertedIndexConfig{
   345  		CleanupIntervalSeconds: 60,
   346  		Stopwords: &models.StopwordConfig{
   347  			Preset: "none",
   348  		},
   349  		IndexNullState:      true,
   350  		IndexPropertyLength: true,
   351  	}
   352  }
   353  
   354  func getIndexFilenames(rootDir, indexName string) ([]string, error) {
   355  	var filenames []string
   356  	indexRoot, err := os.ReadDir(path.Join(rootDir, indexName))
   357  	if err != nil {
   358  		if os.IsNotExist(err) {
   359  			// index was dropped, or never existed
   360  			return filenames, nil
   361  		}
   362  		return nil, err
   363  	}
   364  	shardFiles, err := os.ReadDir(path.Join(rootDir, indexName, indexRoot[0].Name()))
   365  	if err != nil {
   366  		return filenames, err
   367  	}
   368  	for _, f := range shardFiles {
   369  		filenames = append(filenames, f.Name())
   370  	}
   371  	return filenames, nil
   372  }