github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/multi_shard_integration_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  //go:build integrationTest
    13  // +build integrationTest
    14  
    15  package db
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"math"
    21  	"math/rand"
    22  	"sort"
    23  	"testing"
    24  
    25  	"github.com/go-openapi/strfmt"
    26  	"github.com/google/uuid"
    27  	"github.com/sirupsen/logrus"
    28  	"github.com/sirupsen/logrus/hooks/test"
    29  	"github.com/stretchr/testify/assert"
    30  	"github.com/stretchr/testify/require"
    31  	"github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer"
    32  	"github.com/weaviate/weaviate/entities/additional"
    33  	"github.com/weaviate/weaviate/entities/dto"
    34  	"github.com/weaviate/weaviate/entities/filters"
    35  	"github.com/weaviate/weaviate/entities/models"
    36  	"github.com/weaviate/weaviate/entities/schema"
    37  	"github.com/weaviate/weaviate/entities/schema/crossref"
    38  	"github.com/weaviate/weaviate/entities/search"
    39  	"github.com/weaviate/weaviate/entities/searchparams"
    40  	enthnsw "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    41  	"github.com/weaviate/weaviate/entities/verbosity"
    42  	"github.com/weaviate/weaviate/usecases/objects"
    43  	"github.com/weaviate/weaviate/usecases/sharding"
    44  )
    45  
    46  func Test_MultiShardJourneys_IndividualImports(t *testing.T) {
    47  	r := getRandomSeed()
    48  	repo, logger := setupMultiShardTest(t)
    49  	defer func() {
    50  		repo.Shutdown(context.Background())
    51  	}()
    52  
    53  	t.Run("prepare", makeTestMultiShardSchema(repo, logger, false, testClassesForImporting()...))
    54  
    55  	data := multiShardTestData(r)
    56  	queryVec := exampleQueryVec(r)
    57  	groundTruth := bruteForceObjectsByQuery(data, queryVec)
    58  	refData := multiShardRefClassData(r, data)
    59  
    60  	t.Run("import all individually", func(t *testing.T) {
    61  		for _, obj := range data {
    62  			require.Nil(t, repo.PutObject(context.Background(), obj, obj.Vector, nil, nil))
    63  		}
    64  	})
    65  
    66  	t.Run("nodes api", testNodesAPI(repo))
    67  
    68  	t.Run("sorting objects", makeTestSortingClass(repo))
    69  
    70  	t.Run("verify objects", makeTestRetrievingBaseClass(repo, data, queryVec,
    71  		groundTruth))
    72  
    73  	t.Run("import refs individually", func(t *testing.T) {
    74  		for _, obj := range refData {
    75  			require.Nil(t, repo.PutObject(context.Background(), obj, obj.Vector, nil, nil))
    76  		}
    77  	})
    78  
    79  	t.Run("verify refs", makeTestRetrieveRefClass(repo, data, refData))
    80  
    81  	t.Run("batch delete", makeTestBatchDeleteAllObjects(repo))
    82  }
    83  
    84  func Test_MultiShardJourneys_BatchedImports(t *testing.T) {
    85  	r := getRandomSeed()
    86  	repo, logger := setupMultiShardTest(t)
    87  	defer func() {
    88  		repo.Shutdown(context.Background())
    89  	}()
    90  
    91  	t.Run("prepare", makeTestMultiShardSchema(repo, logger, false, testClassesForImporting()...))
    92  
    93  	data := multiShardTestData(r)
    94  	queryVec := exampleQueryVec(r)
    95  	groundTruth := bruteForceObjectsByQuery(data, queryVec)
    96  	refData := multiShardRefClassData(r, data)
    97  
    98  	t.Run("import in a batch", func(t *testing.T) {
    99  		batch := make(objects.BatchObjects, len(data))
   100  		for i, obj := range data {
   101  			batch[i] = objects.BatchObject{
   102  				OriginalIndex: i,
   103  				Object:        obj,
   104  				UUID:          obj.ID,
   105  			}
   106  		}
   107  
   108  		_, err := repo.BatchPutObjects(context.Background(), batch, nil)
   109  		require.Nil(t, err)
   110  	})
   111  
   112  	t.Run("nodes api", testNodesAPI(repo))
   113  
   114  	t.Run("verify objects", makeTestRetrievingBaseClass(repo, data, queryVec,
   115  		groundTruth))
   116  
   117  	t.Run("import refs in large batch", func(t *testing.T) {
   118  		// first strip the refs from the objects, so we can import them in a second
   119  		// step as batch ref
   120  
   121  		for _, obj := range refData {
   122  			withoutRef := &models.Object{
   123  				ID:         obj.ID,
   124  				Class:      obj.Class,
   125  				Vector:     obj.Vector,
   126  				Properties: map[string]interface{}{}, // empty so we remove the ref
   127  			}
   128  
   129  			require.Nil(t, repo.PutObject(context.Background(), withoutRef, withoutRef.Vector, nil, nil))
   130  		}
   131  
   132  		index := 0
   133  		refBatch := make(objects.BatchReferences, len(refData)*len(data))
   134  		for _, obj := range refData {
   135  			for _, ref := range obj.Properties.(map[string]interface{})["toOther"].(models.MultipleRef) {
   136  				to, _ := crossref.ParseSingleRef(ref)
   137  				refBatch[index] = objects.BatchReference{
   138  					OriginalIndex: index,
   139  					To:            to,
   140  					From:          crossref.NewSource(schema.ClassName(obj.Class), "toOther", obj.ID),
   141  				}
   142  				index++
   143  			}
   144  		}
   145  
   146  		_, err := repo.AddBatchReferences(context.Background(), refBatch, nil)
   147  		require.Nil(t, err)
   148  	})
   149  
   150  	t.Run("verify refs", makeTestRetrieveRefClass(repo, data, refData))
   151  
   152  	t.Run("batch delete", makeTestBatchDeleteAllObjects(repo))
   153  }
   154  
   155  func Test_MultiShardJourneys_BM25_Search(t *testing.T) {
   156  	repo, logger := setupMultiShardTest(t)
   157  	defer func() {
   158  		repo.Shutdown(context.Background())
   159  	}()
   160  
   161  	className := "RacecarPosts"
   162  
   163  	t.Run("prepare", func(t *testing.T) {
   164  		class := &models.Class{
   165  			Class:             className,
   166  			VectorIndexConfig: enthnsw.NewDefaultUserConfig(),
   167  			InvertedIndexConfig: &models.InvertedIndexConfig{
   168  				CleanupIntervalSeconds: 60,
   169  			},
   170  			Properties: []*models.Property{
   171  				{
   172  					Name:         "contents",
   173  					DataType:     schema.DataTypeText.PropString(),
   174  					Tokenization: models.PropertyTokenizationWord,
   175  				},
   176  				{
   177  					Name:         "stringProp",
   178  					DataType:     schema.DataTypeText.PropString(),
   179  					Tokenization: models.PropertyTokenizationWhitespace,
   180  				},
   181  				{
   182  					Name:     "textArrayProp",
   183  					DataType: []string{string(schema.DataTypeTextArray)},
   184  				},
   185  			},
   186  		}
   187  
   188  		t.Run("prepare", makeTestMultiShardSchema(repo, logger, true, class))
   189  	})
   190  
   191  	t.Run("insert search data", func(t *testing.T) {
   192  		objs := objects.BatchObjects{
   193  			{
   194  				UUID: "c39751ed-ddc2-4c9f-a45b-8b5732ddde56",
   195  				Object: &models.Object{
   196  					ID:    "c39751ed-ddc2-4c9f-a45b-8b5732ddde56",
   197  					Class: className,
   198  					Properties: map[string]interface{}{
   199  						"contents": "Team Lotus was a domineering force in the early 90s",
   200  					},
   201  				},
   202  			},
   203  			{
   204  				UUID: "5d034311-06e1-476e-b446-1306db91d906",
   205  				Object: &models.Object{
   206  					ID:    "5d034311-06e1-476e-b446-1306db91d906",
   207  					Class: className,
   208  					Properties: map[string]interface{}{
   209  						"contents": "When a car becomes unserviceable, the driver must retire early from the race",
   210  					},
   211  				},
   212  			},
   213  			{
   214  				UUID: "01989a8c-e37f-471d-89ca-9a787dbbf5f2",
   215  				Object: &models.Object{
   216  					ID:    "01989a8c-e37f-471d-89ca-9a787dbbf5f2",
   217  					Class: className,
   218  					Properties: map[string]interface{}{
   219  						"contents": "A young driver is better than an old driver",
   220  					},
   221  				},
   222  			},
   223  			{
   224  				UUID: "392614c5-4ca4-4630-a014-61fe868a20fd",
   225  				Object: &models.Object{
   226  					ID:    "392614c5-4ca4-4630-a014-61fe868a20fd",
   227  					Class: className,
   228  					Properties: map[string]interface{}{
   229  						"contents": "an old driver doesn't retire early",
   230  					},
   231  				},
   232  			},
   233  		}
   234  
   235  		_, err := repo.BatchPutObjects(context.Background(), objs, nil)
   236  		require.Nil(t, err)
   237  	})
   238  
   239  	t.Run("ranked keyword search", func(t *testing.T) {
   240  		type testcase struct {
   241  			expectedResults []string
   242  			rankingParams   *searchparams.KeywordRanking
   243  		}
   244  
   245  		tests := []testcase{
   246  			{
   247  				rankingParams: &searchparams.KeywordRanking{
   248  					Query:      "driver",
   249  					Properties: []string{"contents"},
   250  				},
   251  				expectedResults: []string{
   252  					"01989a8c-e37f-471d-89ca-9a787dbbf5f2",
   253  					"392614c5-4ca4-4630-a014-61fe868a20fd",
   254  					"5d034311-06e1-476e-b446-1306db91d906",
   255  				},
   256  			},
   257  		}
   258  
   259  		for _, test := range tests {
   260  			res, err := repo.Search(context.Background(), dto.GetParams{
   261  				ClassName:      className,
   262  				Pagination:     &filters.Pagination{Limit: 10},
   263  				KeywordRanking: test.rankingParams,
   264  			})
   265  			require.Nil(t, err)
   266  			require.Equal(t, len(test.expectedResults), len(res))
   267  			for i := range res {
   268  				assert.Equal(t, test.expectedResults[i], res[i].ID.String())
   269  			}
   270  			t.Logf("res: %+v", res)
   271  		}
   272  	})
   273  }
   274  
   275  func setupMultiShardTest(t *testing.T) (*DB, *logrus.Logger) {
   276  	dirName := t.TempDir()
   277  
   278  	logger, _ := test.NewNullLogger()
   279  	repo, err := New(logger, Config{
   280  		ServerVersion:             "server-version",
   281  		GitHash:                   "git-hash",
   282  		MemtablesFlushDirtyAfter:  60,
   283  		RootPath:                  dirName,
   284  		QueryMaximumResults:       10000,
   285  		MaxImportGoroutinesFactor: 1,
   286  	}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil)
   287  	require.Nil(t, err)
   288  	return repo, logger
   289  }
   290  
   291  func makeTestMultiShardSchema(repo *DB, logger logrus.FieldLogger, fixedShardState bool, classes ...*models.Class) func(t *testing.T) {
   292  	return func(t *testing.T) {
   293  		var shardState *sharding.State
   294  		if fixedShardState {
   295  			shardState = fixedMultiShardState()
   296  		} else {
   297  			shardState = multiShardState()
   298  		}
   299  		schemaGetter := &fakeSchemaGetter{
   300  			schema:     schema.Schema{Objects: &models.Schema{Classes: nil}},
   301  			shardState: shardState,
   302  		}
   303  		repo.SetSchemaGetter(schemaGetter)
   304  		err := repo.WaitForStartup(testCtx())
   305  		require.Nil(t, err)
   306  		migrator := NewMigrator(repo, logger)
   307  
   308  		t.Run("creating the class", func(t *testing.T) {
   309  			for _, class := range classes {
   310  				require.Nil(t, migrator.AddClass(context.Background(), class, schemaGetter.shardState))
   311  			}
   312  		})
   313  
   314  		// update schema getter so it's in sync with class
   315  		schemaGetter.schema = schema.Schema{
   316  			Objects: &models.Schema{
   317  				Classes: classes,
   318  			},
   319  		}
   320  	}
   321  }
   322  
   323  func makeTestRetrievingBaseClass(repo *DB, data []*models.Object,
   324  	queryVec []float32, groundTruth []*models.Object,
   325  ) func(t *testing.T) {
   326  	return func(t *testing.T) {
   327  		t.Run("retrieve all individually", func(t *testing.T) {
   328  			for _, desired := range data {
   329  				res, err := repo.ObjectByID(context.Background(), desired.ID, search.SelectProperties{}, additional.Properties{}, "")
   330  				assert.Nil(t, err)
   331  
   332  				require.NotNil(t, res)
   333  				assert.Equal(t, desired.Properties.(map[string]interface{})["boolProp"].(bool),
   334  					res.Object().Properties.(map[string]interface{})["boolProp"].(bool))
   335  				assert.Equal(t, desired.ID, res.Object().ID)
   336  			}
   337  		})
   338  
   339  		t.Run("retrieve through filter (object search)", func(t *testing.T) {
   340  			do := func(limit, expected int) {
   341  				filters := &filters.LocalFilter{
   342  					Root: &filters.Clause{
   343  						Operator: filters.OperatorEqual,
   344  						Value: &filters.Value{
   345  							Value: true,
   346  							Type:  schema.DataTypeBoolean,
   347  						},
   348  						On: &filters.Path{
   349  							Property: "boolProp",
   350  						},
   351  					},
   352  				}
   353  				res, err := repo.ObjectSearch(context.Background(), 0, limit, filters, nil,
   354  					additional.Properties{}, "")
   355  				assert.Nil(t, err)
   356  
   357  				assert.Len(t, res, expected)
   358  				for _, obj := range res {
   359  					assert.Equal(t, true, obj.Schema.(map[string]interface{})["boolProp"].(bool))
   360  				}
   361  			}
   362  
   363  			t.Run("with high limit", func(t *testing.T) {
   364  				do(100, 10)
   365  			})
   366  
   367  			t.Run("with low limit", func(t *testing.T) {
   368  				do(3, 3)
   369  			})
   370  		})
   371  
   372  		t.Run("retrieve through filter (class search)", func(t *testing.T) {
   373  			do := func(limit, expected int) {
   374  				filter := &filters.LocalFilter{
   375  					Root: &filters.Clause{
   376  						Operator: filters.OperatorEqual,
   377  						Value: &filters.Value{
   378  							Value: true,
   379  							Type:  schema.DataTypeBoolean,
   380  						},
   381  						On: &filters.Path{
   382  							Property: "boolProp",
   383  						},
   384  					},
   385  				}
   386  				res, err := repo.Search(context.Background(), dto.GetParams{
   387  					Filters: filter,
   388  					Pagination: &filters.Pagination{
   389  						Limit: limit,
   390  					},
   391  					ClassName: "TestClass",
   392  				})
   393  				assert.Nil(t, err)
   394  
   395  				assert.Len(t, res, expected)
   396  				for _, obj := range res {
   397  					assert.Equal(t, true, obj.Schema.(map[string]interface{})["boolProp"].(bool))
   398  				}
   399  			}
   400  
   401  			t.Run("with high limit", func(t *testing.T) {
   402  				do(100, 10)
   403  			})
   404  
   405  			t.Run("with low limit", func(t *testing.T) {
   406  				do(3, 3)
   407  			})
   408  		})
   409  
   410  		t.Run("retrieve through class-level vector search", func(t *testing.T) {
   411  			do := func(t *testing.T, limit, expected int) {
   412  				res, err := repo.VectorSearch(context.Background(), dto.GetParams{
   413  					SearchVector: queryVec,
   414  					Pagination: &filters.Pagination{
   415  						Limit: limit,
   416  					},
   417  					ClassName: "TestClass",
   418  				})
   419  				assert.Nil(t, err)
   420  				assert.Len(t, res, expected)
   421  				for i, obj := range res {
   422  					assert.Equal(t, groundTruth[i].ID, obj.ID)
   423  				}
   424  			}
   425  
   426  			t.Run("with high limit", func(t *testing.T) {
   427  				do(t, 100, 20)
   428  			})
   429  
   430  			t.Run("with low limit", func(t *testing.T) {
   431  				do(t, 3, 3)
   432  			})
   433  		})
   434  
   435  		t.Run("retrieve through inter-class vector search", func(t *testing.T) {
   436  			do := func(t *testing.T, limit, expected int) {
   437  				res, err := repo.CrossClassVectorSearch(context.Background(), queryVec, "", 0, limit, nil)
   438  				assert.Nil(t, err)
   439  				assert.Len(t, res, expected)
   440  				for i, obj := range res {
   441  					assert.Equal(t, groundTruth[i].ID, obj.ID)
   442  				}
   443  			}
   444  
   445  			t.Run("with high limit", func(t *testing.T) {
   446  				do(t, 100, 20)
   447  			})
   448  
   449  			t.Run("with low limit", func(t *testing.T) {
   450  				do(t, 3, 3)
   451  			})
   452  		})
   453  	}
   454  }
   455  
   456  func makeTestRetrieveRefClass(repo *DB, data, refData []*models.Object) func(t *testing.T) {
   457  	return func(t *testing.T) {
   458  		t.Run("retrieve ref data individually with select props", func(t *testing.T) {
   459  			for _, desired := range refData {
   460  				res, err := repo.ObjectByID(context.Background(), desired.ID, search.SelectProperties{
   461  					search.SelectProperty{
   462  						IsPrimitive: false,
   463  						Name:        "toOther",
   464  						Refs: []search.SelectClass{{
   465  							ClassName: "TestClass",
   466  							RefProperties: search.SelectProperties{{
   467  								Name:        "index",
   468  								IsPrimitive: true,
   469  							}},
   470  						}},
   471  					},
   472  				}, additional.Properties{}, "")
   473  				assert.Nil(t, err)
   474  				refs := res.Schema.(map[string]interface{})["toOther"].([]interface{})
   475  				assert.Len(t, refs, len(data))
   476  				for i, ref := range refs {
   477  					indexField := ref.(search.LocalRef).Fields["index"].(float64)
   478  					assert.Equal(t, i, int(indexField))
   479  				}
   480  			}
   481  		})
   482  	}
   483  }
   484  
   485  func makeTestSortingClass(repo *DB) func(t *testing.T) {
   486  	return func(t *testing.T) {
   487  		t.Run("sort by property", func(t *testing.T) {
   488  			getIndex := func(res search.Result) float64 {
   489  				if prop := res.Object().Properties.(map[string]interface{})["index"]; prop != nil {
   490  					return prop.(float64)
   491  				}
   492  				return -1
   493  			}
   494  			getBoolProp := func(res search.Result) bool {
   495  				if prop := res.Object().Properties.(map[string]interface{})["boolProp"]; prop != nil {
   496  					return prop.(bool)
   497  				}
   498  				return false
   499  			}
   500  			getStringProp := func(res search.Result) string {
   501  				if prop := res.Object().Properties.(map[string]interface{})["stringProp"]; prop != nil {
   502  					return prop.(string)
   503  				}
   504  				return ""
   505  			}
   506  			getTextArrayProp := func(res search.Result) []string {
   507  				if prop := res.Object().Properties.(map[string]interface{})["textArrayProp"]; prop != nil {
   508  					return prop.([]string)
   509  				}
   510  				return nil
   511  			}
   512  			type test struct {
   513  				name                   string
   514  				sort                   []filters.Sort
   515  				expectedIndexes        []float64
   516  				expectedBoolProps      []bool
   517  				expectedStringProps    []string
   518  				expectedTextArrayProps [][]string
   519  				constainsErrorMsgs     []string
   520  			}
   521  			tests := []test{
   522  				{
   523  					name:            "indexProp desc",
   524  					sort:            []filters.Sort{{Path: []string{"indexProp"}, Order: "desc"}},
   525  					expectedIndexes: []float64{19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0},
   526  				},
   527  				{
   528  					name:            "indexProp asc",
   529  					sort:            []filters.Sort{{Path: []string{"indexProp"}, Order: "asc"}},
   530  					expectedIndexes: []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19},
   531  				},
   532  				{
   533  					name:                "stringProp desc",
   534  					sort:                []filters.Sort{{Path: []string{"stringProp"}, Order: "desc"}},
   535  					expectedStringProps: []string{"s19", "s18", "s17", "s16", "s15", "s14", "s13", "s12", "s11", "s10", "s09", "s08", "s07", "s06", "s05", "s04", "s03", "s02", "s01", "s00"},
   536  				},
   537  				{
   538  					name:                "stringProp asc",
   539  					sort:                []filters.Sort{{Path: []string{"stringProp"}, Order: "asc"}},
   540  					expectedStringProps: []string{"s00", "s01", "s02", "s03", "s04", "s05", "s06", "s07", "s08", "s09", "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19"},
   541  				},
   542  				{
   543  					name:                   "textArrayProp desc",
   544  					sort:                   []filters.Sort{{Path: []string{"textArrayProp"}, Order: "desc"}},
   545  					expectedTextArrayProps: [][]string{{"s19", "19"}, {"s18", "18"}, {"s17", "17"}, {"s16", "16"}, {"s15", "15"}, {"s14", "14"}, {"s13", "13"}, {"s12", "12"}, {"s11", "11"}, {"s10", "10"}, {"s09", "09"}, {"s08", "08"}, {"s07", "07"}, {"s06", "06"}, {"s05", "05"}, {"s04", "04"}, {"s03", "03"}, {"s02", "02"}, {"s01", "01"}, {"s00", "00"}},
   546  				},
   547  				{
   548  					name:                   "textArrayProp asc",
   549  					sort:                   []filters.Sort{{Path: []string{"textArrayProp"}, Order: "asc"}},
   550  					expectedTextArrayProps: [][]string{{"s00", "00"}, {"s01", "01"}, {"s02", "02"}, {"s03", "03"}, {"s04", "04"}, {"s05", "05"}, {"s06", "06"}, {"s07", "07"}, {"s08", "08"}, {"s09", "09"}, {"s10", "10"}, {"s11", "11"}, {"s12", "12"}, {"s13", "13"}, {"s14", "14"}, {"s15", "15"}, {"s16", "16"}, {"s17", "17"}, {"s18", "18"}, {"s19", "19"}},
   551  				},
   552  				{
   553  					name:              "boolProp desc",
   554  					sort:              []filters.Sort{{Path: []string{"boolProp"}, Order: "desc"}},
   555  					expectedBoolProps: []bool{true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false},
   556  				},
   557  				{
   558  					name:              "boolProp asc",
   559  					sort:              []filters.Sort{{Path: []string{"boolProp"}, Order: "asc"}},
   560  					expectedBoolProps: []bool{false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true},
   561  				},
   562  				{
   563  					name:                "boolProp asc stringProp asc",
   564  					sort:                []filters.Sort{{Path: []string{"boolProp"}, Order: "asc"}, {Path: []string{"stringProp"}, Order: "asc"}},
   565  					expectedBoolProps:   []bool{false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true},
   566  					expectedStringProps: []string{"s01", "s03", "s05", "s07", "s09", "s11", "s13", "s15", "s17", "s19", "s00", "s02", "s04", "s06", "s08", "s10", "s12", "s14", "s16", "s18"},
   567  				},
   568  				{
   569  					name:                "boolProp desc stringProp asc",
   570  					sort:                []filters.Sort{{Path: []string{"boolProp"}, Order: "desc"}, {Path: []string{"stringProp"}, Order: "asc"}},
   571  					expectedBoolProps:   []bool{true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false},
   572  					expectedStringProps: []string{"s00", "s02", "s04", "s06", "s08", "s10", "s12", "s14", "s16", "s18", "s01", "s03", "s05", "s07", "s09", "s11", "s13", "s15", "s17", "s19"},
   573  				},
   574  				{
   575  					name:              "boolProp asc indexProp asc",
   576  					sort:              []filters.Sort{{Path: []string{"boolProp"}, Order: "asc"}, {Path: []string{"indexProp"}, Order: "asc"}},
   577  					expectedBoolProps: []bool{false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true},
   578  					expectedIndexes:   []float64{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18},
   579  				},
   580  				{
   581  					name:              "boolProp asc indexProp desc",
   582  					sort:              []filters.Sort{{Path: []string{"boolProp"}, Order: "asc"}, {Path: []string{"indexProp"}, Order: "desc"}},
   583  					expectedBoolProps: []bool{false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true},
   584  					expectedIndexes:   []float64{19, 17, 15, 13, 11, 9, 7, 5, 3, 1, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0},
   585  				},
   586  				{
   587  					name:            "index property doesn't exist in testrefclass",
   588  					sort:            []filters.Sort{{Path: []string{"index"}, Order: "desc"}},
   589  					expectedIndexes: nil,
   590  					constainsErrorMsgs: []string{
   591  						"no such prop with name 'index' found in class 'TestRefClass' in the schema. " +
   592  							"Check your schema files for which properties in this class are available",
   593  					},
   594  				},
   595  				{
   596  					name:            "non existent property in all classes",
   597  					sort:            []filters.Sort{{Path: []string{"nonexistentproperty"}, Order: "desc"}},
   598  					expectedIndexes: nil,
   599  					constainsErrorMsgs: []string{
   600  						"no such prop with name 'nonexistentproperty' found in class 'TestClass' in the schema. " +
   601  							"Check your schema files for which properties in this class are available",
   602  						"no such prop with name 'nonexistentproperty' found in class 'TestRefClass' in the schema. " +
   603  							"Check your schema files for which properties in this class are available",
   604  					},
   605  				},
   606  			}
   607  			for _, test := range tests {
   608  				t.Run(test.name, func(t *testing.T) {
   609  					res, err := repo.ObjectSearch(context.Background(), 0, 1000, nil, test.sort,
   610  						additional.Properties{}, "")
   611  					if len(test.constainsErrorMsgs) > 0 {
   612  						require.NotNil(t, err)
   613  						for _, errorMsg := range test.constainsErrorMsgs {
   614  							assert.Contains(t, err.Error(), errorMsg)
   615  						}
   616  					} else {
   617  						require.Nil(t, err)
   618  						if len(test.expectedIndexes) > 0 {
   619  							for i := range res {
   620  								assert.Equal(t, test.expectedIndexes[i], getIndex(res[i]))
   621  							}
   622  						}
   623  						if len(test.expectedBoolProps) > 0 {
   624  							for i := range res {
   625  								assert.Equal(t, test.expectedBoolProps[i], getBoolProp(res[i]))
   626  							}
   627  						}
   628  						if len(test.expectedStringProps) > 0 {
   629  							for i := range res {
   630  								assert.Equal(t, test.expectedStringProps[i], getStringProp(res[i]))
   631  							}
   632  						}
   633  						if len(test.expectedTextArrayProps) > 0 {
   634  							for i := range res {
   635  								assert.EqualValues(t, test.expectedTextArrayProps[i], getTextArrayProp(res[i]))
   636  							}
   637  						}
   638  					}
   639  				})
   640  			}
   641  		})
   642  	}
   643  }
   644  
   645  func testNodesAPI(repo *DB) func(t *testing.T) {
   646  	return func(t *testing.T) {
   647  		nodeStatues, err := repo.GetNodeStatus(context.Background(), "", verbosity.OutputVerbose)
   648  		require.Nil(t, err)
   649  		require.NotNil(t, nodeStatues)
   650  
   651  		require.Len(t, nodeStatues, 1)
   652  		nodeStatus := nodeStatues[0]
   653  		assert.NotNil(t, nodeStatus)
   654  		assert.Equal(t, "node1", nodeStatus.Name)
   655  		assert.Equal(t, "server-version", nodeStatus.Version)
   656  		assert.Equal(t, "git-hash", nodeStatus.GitHash)
   657  		assert.Len(t, nodeStatus.Shards, 6)
   658  		var testClassShardsCount, testClassObjectsCount int64
   659  		var testRefClassShardsCount, testRefClassObjectsCount int64
   660  		for _, status := range nodeStatus.Shards {
   661  			if status.Class == "TestClass" {
   662  				testClassShardsCount += 1
   663  				testClassObjectsCount += status.ObjectCount
   664  			}
   665  			if status.Class == "TestRefClass" {
   666  				testRefClassShardsCount += 1
   667  				testRefClassObjectsCount += status.ObjectCount
   668  			}
   669  		}
   670  		assert.Equal(t, int64(3), testClassShardsCount)
   671  		// a previous version of this test made assertions on object counts,
   672  		// however with object count becoming async, we can no longer make exact
   673  		// assertions here. See https://github.com/weaviate/weaviate/issues/4193
   674  		// for details.
   675  		assert.Equal(t, int64(3), testRefClassShardsCount)
   676  		assert.Equal(t, int64(6), nodeStatus.Stats.ShardCount)
   677  	}
   678  }
   679  
   680  func makeTestBatchDeleteAllObjects(repo *DB) func(t *testing.T) {
   681  	return func(t *testing.T) {
   682  		performDelete := func(t *testing.T, className string) {
   683  			getParams := func(className string, dryRun bool) objects.BatchDeleteParams {
   684  				return objects.BatchDeleteParams{
   685  					ClassName: schema.ClassName(className),
   686  					Filters: &filters.LocalFilter{
   687  						Root: &filters.Clause{
   688  							Operator: filters.OperatorLike,
   689  							Value: &filters.Value{
   690  								Value: "*",
   691  								Type:  schema.DataTypeText,
   692  							},
   693  							On: &filters.Path{
   694  								Property: "id",
   695  							},
   696  						},
   697  					},
   698  					DryRun: dryRun,
   699  					Output: "verbose",
   700  				}
   701  			}
   702  			performClassSearch := func(className string) ([]search.Result, error) {
   703  				return repo.Search(context.Background(), dto.GetParams{
   704  					ClassName:  className,
   705  					Pagination: &filters.Pagination{Limit: 10000},
   706  				})
   707  			}
   708  			// get the initial count of the objects
   709  			res, err := performClassSearch(className)
   710  			require.Nil(t, err)
   711  			beforeDelete := len(res)
   712  			require.True(t, beforeDelete > 0)
   713  			// dryRun == true
   714  			batchDeleteRes, err := repo.BatchDeleteObjects(context.Background(), getParams(className, true), nil, "")
   715  			require.Nil(t, err)
   716  			require.Equal(t, int64(beforeDelete), batchDeleteRes.Matches)
   717  			require.Equal(t, beforeDelete, len(batchDeleteRes.Objects))
   718  			for _, batchRes := range batchDeleteRes.Objects {
   719  				require.Nil(t, batchRes.Err)
   720  			}
   721  			// check that every object is preserved (not deleted)
   722  			res, err = performClassSearch(className)
   723  			require.Nil(t, err)
   724  			require.Equal(t, beforeDelete, len(res))
   725  			// dryRun == false, perform actual delete
   726  			batchDeleteRes, err = repo.BatchDeleteObjects(context.Background(), getParams(className, false), nil, "")
   727  			require.Nil(t, err)
   728  			require.Equal(t, int64(beforeDelete), batchDeleteRes.Matches)
   729  			require.Equal(t, beforeDelete, len(batchDeleteRes.Objects))
   730  			for _, batchRes := range batchDeleteRes.Objects {
   731  				require.Nil(t, batchRes.Err)
   732  			}
   733  			// check that every object is deleted
   734  			res, err = performClassSearch(className)
   735  			require.Nil(t, err)
   736  			require.Equal(t, 0, len(res))
   737  		}
   738  		t.Run("batch delete TestRefClass", func(t *testing.T) {
   739  			performDelete(t, "TestRefClass")
   740  		})
   741  		t.Run("batch delete TestClass", func(t *testing.T) {
   742  			performDelete(t, "TestClass")
   743  		})
   744  	}
   745  }
   746  
   747  func exampleQueryVec(r *rand.Rand) []float32 {
   748  	dim := 10
   749  	vec := make([]float32, dim)
   750  	for j := range vec {
   751  		vec[j] = r.Float32()
   752  	}
   753  	return vec
   754  }
   755  
   756  func multiShardTestData(r *rand.Rand) []*models.Object {
   757  	size := 20
   758  	dim := 10
   759  	out := make([]*models.Object, size)
   760  	for i := range out {
   761  		vec := make([]float32, dim)
   762  		for j := range vec {
   763  			vec[j] = r.Float32()
   764  		}
   765  
   766  		out[i] = &models.Object{
   767  			ID:     strfmt.UUID(uuid.New().String()),
   768  			Class:  "TestClass",
   769  			Vector: vec,
   770  			Properties: map[string]interface{}{
   771  				"boolProp":      i%2 == 0,
   772  				"index":         i,
   773  				"indexProp":     i,
   774  				"stringProp":    fmt.Sprintf("s%02d", i),
   775  				"textArrayProp": []string{fmt.Sprintf("s%02d", i), fmt.Sprintf("%02d", i)},
   776  			},
   777  		}
   778  	}
   779  
   780  	return out
   781  }
   782  
   783  func multiShardRefClassData(r *rand.Rand, targets []*models.Object) []*models.Object {
   784  	// each class will link to all possible targets, so that we can be sure that
   785  	// we hit cross-shard links
   786  	targetLinks := make(models.MultipleRef, len(targets))
   787  	for i, obj := range targets {
   788  		targetLinks[i] = &models.SingleRef{
   789  			Beacon: strfmt.URI(crossref.NewLocalhost("", obj.ID).String()),
   790  		}
   791  	}
   792  
   793  	size := 20
   794  	dim := 10
   795  	out := make([]*models.Object, size)
   796  	for i := range out {
   797  		vec := make([]float32, dim)
   798  		for j := range vec {
   799  			vec[j] = r.Float32()
   800  		}
   801  
   802  		out[i] = &models.Object{
   803  			ID:     strfmt.UUID(uuid.New().String()),
   804  			Class:  "TestRefClass",
   805  			Vector: vec,
   806  			Properties: map[string]interface{}{
   807  				"toOther": targetLinks,
   808  			},
   809  		}
   810  	}
   811  
   812  	return out
   813  }
   814  
   815  func bruteForceObjectsByQuery(objs []*models.Object,
   816  	query []float32,
   817  ) []*models.Object {
   818  	type distanceAndObj struct {
   819  		distance float32
   820  		obj      *models.Object
   821  	}
   822  
   823  	distProv := distancer.NewDotProductProvider()
   824  	distances := make([]distanceAndObj, len(objs))
   825  
   826  	for i := range objs {
   827  		dist, _, _ := distProv.SingleDist(normalize(query), normalize(objs[i].Vector))
   828  		distances[i] = distanceAndObj{
   829  			distance: dist,
   830  			obj:      objs[i],
   831  		}
   832  	}
   833  
   834  	sort.Slice(distances, func(a, b int) bool {
   835  		return distances[a].distance < distances[b].distance
   836  	})
   837  
   838  	out := make([]*models.Object, len(objs))
   839  	for i := range out {
   840  		out[i] = distances[i].obj
   841  	}
   842  
   843  	return out
   844  }
   845  
   846  func testClassesForImporting() []*models.Class {
   847  	return []*models.Class{
   848  		{
   849  			VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
   850  			InvertedIndexConfig: invertedConfig(),
   851  			Class:               "TestClass",
   852  			Properties: []*models.Property{
   853  				{
   854  					Name:     "boolProp",
   855  					DataType: []string{string(schema.DataTypeBoolean)},
   856  				},
   857  				{
   858  					Name:     "index",
   859  					DataType: []string{string(schema.DataTypeInt)},
   860  				},
   861  				{
   862  					Name:     "indexProp",
   863  					DataType: []string{string(schema.DataTypeInt)},
   864  				},
   865  				{
   866  					Name:         "stringProp",
   867  					DataType:     schema.DataTypeText.PropString(),
   868  					Tokenization: models.PropertyTokenizationWhitespace,
   869  				},
   870  				{
   871  					Name:     "textArrayProp",
   872  					DataType: []string{string(schema.DataTypeTextArray)},
   873  				},
   874  			},
   875  		},
   876  		{
   877  			VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
   878  			InvertedIndexConfig: invertedConfig(),
   879  			Class:               "TestRefClass",
   880  			Properties: []*models.Property{
   881  				{
   882  					Name:     "boolProp",
   883  					DataType: []string{string(schema.DataTypeBoolean)},
   884  				},
   885  				{
   886  					Name:     "toOther",
   887  					DataType: []string{"TestClass"},
   888  				},
   889  				{
   890  					Name:     "indexProp",
   891  					DataType: []string{string(schema.DataTypeInt)},
   892  				},
   893  				{
   894  					Name:         "stringProp",
   895  					DataType:     schema.DataTypeText.PropString(),
   896  					Tokenization: models.PropertyTokenizationWhitespace,
   897  				},
   898  				{
   899  					Name:     "textArrayProp",
   900  					DataType: []string{string(schema.DataTypeTextArray)},
   901  				},
   902  			},
   903  		},
   904  	}
   905  }
   906  
   907  func normalize(v []float32) []float32 {
   908  	var norm float32
   909  	for i := range v {
   910  		norm += v[i] * v[i]
   911  	}
   912  
   913  	norm = float32(math.Sqrt(float64(norm)))
   914  	for i := range v {
   915  		v[i] = v[i] / norm
   916  	}
   917  
   918  	return v
   919  }