github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/bm25f_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  //go:build integrationTest
    13  
    14  package db
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"testing"
    20  
    21  	"github.com/go-openapi/strfmt"
    22  	"github.com/google/uuid"
    23  	"github.com/sirupsen/logrus"
    24  	"github.com/stretchr/testify/assert"
    25  	"github.com/stretchr/testify/require"
    26  	"github.com/weaviate/weaviate/adapters/repos/db/inverted"
    27  	"github.com/weaviate/weaviate/entities/additional"
    28  	"github.com/weaviate/weaviate/entities/filters"
    29  	"github.com/weaviate/weaviate/entities/models"
    30  	"github.com/weaviate/weaviate/entities/schema"
    31  	"github.com/weaviate/weaviate/entities/searchparams"
    32  	enthnsw "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    33  )
    34  
    35  func BM25FinvertedConfig(k1, b float32, stopWordPreset string) *models.InvertedIndexConfig {
    36  	return &models.InvertedIndexConfig{
    37  		Bm25: &models.BM25Config{
    38  			K1: k1,
    39  			B:  b,
    40  		},
    41  		CleanupIntervalSeconds: 60,
    42  		Stopwords: &models.StopwordConfig{
    43  			Preset: stopWordPreset,
    44  		},
    45  		IndexNullState:      true,
    46  		IndexPropertyLength: true,
    47  	}
    48  }
    49  
    50  func SetupClass(t require.TestingT, repo *DB, schemaGetter *fakeSchemaGetter, logger logrus.FieldLogger, k1, b float32,
    51  ) {
    52  	vFalse := false
    53  	vTrue := true
    54  
    55  	class := &models.Class{
    56  		VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
    57  		InvertedIndexConfig: BM25FinvertedConfig(k1, b, "none"),
    58  		Class:               "MyClass",
    59  
    60  		Properties: []*models.Property{
    61  			{
    62  				Name:            "title",
    63  				DataType:        schema.DataTypeText.PropString(),
    64  				Tokenization:    models.PropertyTokenizationWord,
    65  				IndexFilterable: &vFalse,
    66  				IndexSearchable: &vTrue,
    67  			},
    68  			{
    69  				Name:            "description",
    70  				DataType:        schema.DataTypeText.PropString(),
    71  				Tokenization:    models.PropertyTokenizationWord,
    72  				IndexFilterable: &vFalse,
    73  				IndexSearchable: &vTrue,
    74  			},
    75  			{
    76  				Name:            "review",
    77  				DataType:        schema.DataTypeText.PropString(),
    78  				Tokenization:    models.PropertyTokenizationWord,
    79  				IndexFilterable: &vFalse,
    80  				IndexSearchable: &vTrue,
    81  			},
    82  			{
    83  				Name:            "textField",
    84  				DataType:        schema.DataTypeText.PropString(),
    85  				Tokenization:    models.PropertyTokenizationField,
    86  				IndexFilterable: &vFalse,
    87  				IndexSearchable: &vTrue,
    88  			},
    89  			{
    90  				Name:            "textWhitespace",
    91  				DataType:        schema.DataTypeText.PropString(),
    92  				Tokenization:    models.PropertyTokenizationWhitespace,
    93  				IndexFilterable: &vFalse,
    94  				IndexSearchable: &vTrue,
    95  			},
    96  			{
    97  				Name:            "relatedToGolf",
    98  				DataType:        schema.DataTypeBoolean.PropString(),
    99  				IndexFilterable: &vFalse,
   100  				IndexSearchable: &vTrue,
   101  			},
   102  			{
   103  				Name:            "multiTitles",
   104  				DataType:        schema.DataTypeTextArray.PropString(),
   105  				Tokenization:    models.PropertyTokenizationWord,
   106  				IndexFilterable: &vFalse,
   107  				IndexSearchable: &vTrue,
   108  			},
   109  			{
   110  				Name:            "multiTextWhitespace",
   111  				DataType:        schema.DataTypeTextArray.PropString(),
   112  				Tokenization:    models.PropertyTokenizationWhitespace,
   113  				IndexFilterable: &vFalse,
   114  				IndexSearchable: &vTrue,
   115  			},
   116  		},
   117  	}
   118  
   119  	schema := schema.Schema{
   120  		Objects: &models.Schema{
   121  			Classes: []*models.Class{class},
   122  		},
   123  	}
   124  
   125  	schemaGetter.schema = schema
   126  
   127  	migrator := NewMigrator(repo, logger)
   128  	migrator.AddClass(context.Background(), class, schemaGetter.shardState)
   129  
   130  	testData := []map[string]interface{}{}
   131  	testData = append(testData, map[string]interface{}{"title": "Our journey to BM25F", "description": "This is how we get to BM25F", "review": "none none none", "multiTitles": []string{"breakfast", "dinner"}})
   132  	testData = append(testData, map[string]interface{}{"title": "Why I dont like journey", "description": "This is about how we get somewhere", "multiTitles": []string{"going to a restaurant for dinner", "sandwiches and desert are a great lunch"}})
   133  	testData = append(testData, map[string]interface{}{"title": "My journeys in Journey", "description": "A journey story about journeying"})
   134  	testData = append(testData, map[string]interface{}{"title": "An unrelated title", "description": "Actually all about journey"})
   135  	testData = append(testData, map[string]interface{}{"title": "journey journey", "description": "journey journey journey"})
   136  	testData = append(testData, map[string]interface{}{"title": "journey", "description": "journey journey", "multiTextWhitespace": []string{"totally irrelevant:)", "we all MuuultiYell! together"}})
   137  	testData = append(testData, map[string]interface{}{"title": "JOURNEY", "description": "A LOUD JOURNEY", "multiTextWhitespace": []string{"MuuultiYell!", "is fun"}})
   138  	testData = append(testData, map[string]interface{}{"title": "An unrelated title", "description": "Absolutely nothing to do with the topic", "textField": "*&^$@#$%^&*()(Offtopic!!!!"})
   139  	testData = append(testData, map[string]interface{}{"title": "none", "description": "other", "textField": "YELLING IS FUN"})
   140  	testData = append(testData, map[string]interface{}{"title": "something", "description": "none none", "review": "none none none none none none"})
   141  
   142  	for i, data := range testData {
   143  		id := strfmt.UUID(uuid.MustParse(fmt.Sprintf("%032d", i)).String())
   144  
   145  		obj := &models.Object{Class: "MyClass", ID: id, Properties: data, CreationTimeUnix: 1565612833955, LastUpdateTimeUnix: 10000020}
   146  		vector := []float32{1, 3, 5, 0.4}
   147  		//{title: "Our journey to BM25F", description: " This is how we get to BM25F"}}
   148  		err := repo.PutObject(context.Background(), obj, vector, nil, nil)
   149  		require.Nil(t, err)
   150  	}
   151  }
   152  
   153  // DuplicatedFrom SetupClass to make sure this new test does not alter the results of the existing one
   154  func SetupClassForFilterScoringTest(t require.TestingT, repo *DB, schemaGetter *fakeSchemaGetter, logger logrus.FieldLogger, k1, b float32,
   155  ) {
   156  	vFalse := false
   157  	vTrue := true
   158  
   159  	class := &models.Class{
   160  		VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
   161  		InvertedIndexConfig: BM25FinvertedConfig(k1, b, "none"),
   162  		Class:               "FilterClass",
   163  
   164  		Properties: []*models.Property{
   165  			{
   166  				Name:            "description",
   167  				DataType:        schema.DataTypeText.PropString(),
   168  				Tokenization:    models.PropertyTokenizationWord,
   169  				IndexFilterable: &vFalse,
   170  				IndexSearchable: &vTrue,
   171  			},
   172  			{
   173  				Name:            "relatedToGolf",
   174  				DataType:        schema.DataTypeBoolean.PropString(),
   175  				IndexFilterable: &vTrue,
   176  			},
   177  		},
   178  	}
   179  
   180  	schema := schema.Schema{
   181  		Objects: &models.Schema{
   182  			Classes: []*models.Class{class},
   183  		},
   184  	}
   185  
   186  	schemaGetter.schema = schema
   187  
   188  	migrator := NewMigrator(repo, logger)
   189  	migrator.AddClass(context.Background(), class, schemaGetter.shardState)
   190  
   191  	testData := []map[string]interface{}{}
   192  	testData = append(testData, map[string]interface{}{"description": "Brooks Koepka appeared a lot in the ms marco dataset. I was surprised to see golf content in there. I assume if the dataset was newer, we'd see a lot more Rory though.", "relatedToGolf": true})
   193  	testData = append(testData, map[string]interface{}{"description": "While one would expect Koepka to be a somewhat rare name, it did appear in msmarco also outside the context of Brooks.", "relatedToGolf": false})
   194  
   195  	for i, data := range testData {
   196  		id := strfmt.UUID(uuid.MustParse(fmt.Sprintf("%032d", i)).String())
   197  
   198  		obj := &models.Object{Class: "FilterClass", ID: id, Properties: data, CreationTimeUnix: 1565612833955, LastUpdateTimeUnix: 10000020}
   199  		vector := []float32{1, 3, 5, 0.4}
   200  		err := repo.PutObject(context.Background(), obj, vector, nil, nil)
   201  		require.Nil(t, err)
   202  	}
   203  }
   204  
   205  func TestBM25FJourney(t *testing.T) {
   206  	dirName := t.TempDir()
   207  
   208  	logger := logrus.New()
   209  	schemaGetter := &fakeSchemaGetter{
   210  		schema:     schema.Schema{Objects: &models.Schema{Classes: nil}},
   211  		shardState: singleShardState(),
   212  	}
   213  	repo, err := New(logger, Config{
   214  		MemtablesFlushDirtyAfter:  60,
   215  		RootPath:                  dirName,
   216  		QueryMaximumResults:       10000,
   217  		MaxImportGoroutinesFactor: 1,
   218  	}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, nil, nil)
   219  	require.Nil(t, err)
   220  	repo.SetSchemaGetter(schemaGetter)
   221  	require.Nil(t, repo.WaitForStartup(context.TODO()))
   222  	defer repo.Shutdown(context.Background())
   223  
   224  	SetupClass(t, repo, schemaGetter, logger, 1.2, 0.75)
   225  
   226  	idx := repo.GetIndex("MyClass")
   227  	require.NotNil(t, idx)
   228  
   229  	// Check basic search
   230  	addit := additional.Properties{}
   231  
   232  	t.Run("bm25f journey", func(t *testing.T) {
   233  		kwr := &searchparams.KeywordRanking{Type: "bm25", Properties: []string{"title", "description", "textField"}, Query: "journey"}
   234  		res, scores, err := idx.objectSearch(context.TODO(), 1000, nil, kwr, nil, nil, addit, nil, "", 0)
   235  		require.Nil(t, err)
   236  
   237  		// Print results
   238  		t.Log("--- Start results for basic search ---")
   239  		for i, r := range res {
   240  			t.Logf("Result id: %v, score: %v, title: %v, description: %v, additional %+v\n", r.DocID, scores[i], r.Object.Properties.(map[string]interface{})["title"], r.Object.Properties.(map[string]interface{})["description"], r.Object.Additional)
   241  		}
   242  
   243  		// Check results in correct order
   244  		require.Equal(t, uint64(4), res[0].DocID)
   245  		require.Equal(t, uint64(5), res[1].DocID)
   246  		require.Equal(t, uint64(6), res[2].DocID)
   247  		require.Equal(t, uint64(3), res[3].DocID)
   248  		require.Equal(t, uint64(0), res[4].DocID)
   249  		require.Equal(t, uint64(2), res[5].DocID)
   250  
   251  		// Without additionalExplanations no explainScore entry should be present
   252  		require.NotContains(t, res[0].Object.Additional, "explainScore")
   253  	})
   254  
   255  	// Check non-alpha search on string field
   256  
   257  	// text/field are tokenized entirely, so we can search for non-alpha characters
   258  	t.Run("bm25f textField non-alpha", func(t *testing.T) {
   259  		kwrTextField := &searchparams.KeywordRanking{Type: "bm25", Properties: []string{"title", "description", "textField"}, Query: "*&^$@#$%^&*()(Offtopic!!!!"}
   260  		addit = additional.Properties{}
   261  		resTextField, scores, err := idx.objectSearch(context.TODO(), 1000, nil, kwrTextField, nil, nil, addit, nil, "", 0)
   262  		require.Nil(t, err)
   263  
   264  		// Print results
   265  		t.Log("--- Start results for textField search ---")
   266  		for i, r := range resTextField {
   267  			t.Logf("Result id: %v, score: %v, title: %v, description: %v, additional %+v\n", r.DocID, scores[i], r.Object.Properties.(map[string]interface{})["title"], r.Object.Properties.(map[string]interface{})["description"], r.Object.Additional)
   268  		}
   269  
   270  		// Check results in correct order
   271  		require.Equal(t, uint64(7), resTextField[0].DocID)
   272  	})
   273  
   274  	// text/field are not lower-cased before indexing, so upper case searches must be passed through unchanged.
   275  	t.Run("bm25f textField caps", func(t *testing.T) {
   276  		kwrTextField := &searchparams.KeywordRanking{Type: "bm25", Properties: []string{"textField"}, Query: "YELLING IS FUN"}
   277  		addit := additional.Properties{}
   278  		resTextField, scores, err := idx.objectSearch(context.TODO(), 1000, nil, kwrTextField, nil, nil, addit, nil, "", 0)
   279  		require.Nil(t, err)
   280  
   281  		// Print results
   282  		t.Log("--- Start results for textField caps search ---")
   283  		for i, r := range resTextField {
   284  			t.Logf("Result id: %v, score: %v, title: %v, description: %v, additional %+v\n", r.DocID, scores[i], r.Object.Properties.(map[string]interface{})["title"], r.Object.Properties.(map[string]interface{})["description"], r.Object.Additional)
   285  		}
   286  
   287  		// Check results in correct order
   288  		require.Equal(t, uint64(8), resTextField[0].DocID)
   289  	})
   290  
   291  	// Check basic text search WITH CAPS
   292  	t.Run("bm25f text with caps", func(t *testing.T) {
   293  		kwr := &searchparams.KeywordRanking{Type: "bm25", Properties: []string{"title", "description"}, Query: "JOURNEY"}
   294  		res, scores, err := idx.objectSearch(context.TODO(), 1000, nil, kwr, nil, nil, addit, nil, "", 0)
   295  		// Print results
   296  		t.Log("--- Start results for search with caps ---")
   297  		for i, r := range res {
   298  			t.Logf("Result id: %v, score: %v, title: %v, description: %v, additional %+v\n", r.DocID, scores[i], r.Object.Properties.(map[string]interface{})["title"], r.Object.Properties.(map[string]interface{})["description"], r.Object.Additional)
   299  		}
   300  		require.Nil(t, err)
   301  
   302  		// Check results in correct order
   303  		require.Equal(t, uint64(4), res[0].DocID)
   304  		require.Equal(t, uint64(5), res[1].DocID)
   305  		require.Equal(t, uint64(6), res[2].DocID)
   306  		require.Equal(t, uint64(2), res[3].DocID)
   307  		require.Equal(t, uint64(3), res[4].DocID)
   308  		require.Equal(t, uint64(0), res[5].DocID)
   309  		require.Equal(t, uint64(1), res[6].DocID)
   310  	})
   311  
   312  	t.Run("bm25f journey boosted", func(t *testing.T) {
   313  		kwr := &searchparams.KeywordRanking{Type: "bm25", Properties: []string{"title^3", "description"}, Query: "journey"}
   314  		res, scores, err := idx.objectSearch(context.TODO(), 1000, nil, kwr, nil, nil, addit, nil, "", 0)
   315  
   316  		require.Nil(t, err)
   317  		// Print results
   318  		t.Log("--- Start results for boosted search ---")
   319  		for i, r := range res {
   320  			t.Logf("Result id: %v, score: %v, title: %v, description: %v, additional %+v\n", r.DocID, scores[i], r.Object.Properties.(map[string]interface{})["title"], r.Object.Properties.(map[string]interface{})["description"], r.Object.Additional)
   321  		}
   322  
   323  		// Check results in correct order
   324  		require.Equal(t, uint64(4), res[0].DocID)
   325  		require.Equal(t, uint64(5), res[1].DocID)
   326  		require.Equal(t, uint64(6), res[2].DocID)
   327  		require.Equal(t, uint64(0), res[3].DocID)
   328  		require.Equal(t, uint64(1), res[4].DocID)
   329  		require.Equal(t, uint64(2), res[5].DocID)
   330  		require.Equal(t, uint64(3), res[6].DocID)
   331  	})
   332  
   333  	t.Run("Check search with two terms", func(t *testing.T) {
   334  		kwr := &searchparams.KeywordRanking{Type: "bm25", Properties: []string{"title", "description"}, Query: "journey somewhere"}
   335  		res, _, err := idx.objectSearch(context.TODO(), 1000, nil, kwr, nil, nil, addit, nil, "", 0)
   336  		require.Nil(t, err)
   337  		// Check results in correct order
   338  		require.Equal(t, uint64(1), res[0].DocID)
   339  		require.Equal(t, uint64(4), res[1].DocID)
   340  		require.Equal(t, uint64(5), res[2].DocID)
   341  		require.Equal(t, uint64(6), res[3].DocID)
   342  		require.Equal(t, uint64(2), res[4].DocID)
   343  	})
   344  
   345  	t.Run("bm25f journey somewhere no properties", func(t *testing.T) {
   346  		// Check search with no properties (should include all properties)
   347  		kwr := &searchparams.KeywordRanking{Type: "bm25", Properties: []string{}, Query: "journey somewhere"}
   348  		res, _, err := idx.objectSearch(context.TODO(), 1000, nil, kwr, nil, nil, addit, nil, "", 0)
   349  		require.Nil(t, err)
   350  
   351  		// Check results in correct order
   352  		require.Equal(t, uint64(1), res[0].DocID)
   353  		require.Equal(t, uint64(4), res[1].DocID)
   354  		require.Equal(t, uint64(5), res[2].DocID)
   355  		require.Equal(t, uint64(6), res[3].DocID)
   356  	})
   357  
   358  	t.Run("bm25f non alphanums", func(t *testing.T) {
   359  		// Check search with no properties (should include all properties)
   360  		kwr := &searchparams.KeywordRanking{Type: "bm25", Properties: []string{}, Query: "*&^$@#$%^&*()(Offtopic!!!!"}
   361  		res, _, err := idx.objectSearch(context.TODO(), 1000, nil, kwr, nil, nil, addit, nil, "", 0)
   362  		require.Nil(t, err)
   363  		require.Equal(t, uint64(7), res[0].DocID)
   364  	})
   365  
   366  	t.Run("First result has high score", func(t *testing.T) {
   367  		kwr := &searchparams.KeywordRanking{Type: "bm25", Properties: []string{"description"}, Query: "about BM25F"}
   368  		res, _, err := idx.objectSearch(context.TODO(), 5, nil, kwr, nil, nil, addit, nil, "", 0)
   369  		require.Nil(t, err)
   370  
   371  		require.Equal(t, uint64(0), res[0].DocID)
   372  		require.Len(t, res, 4) // four results have one of the terms
   373  	})
   374  
   375  	t.Run("More results than limit", func(t *testing.T) {
   376  		kwr := &searchparams.KeywordRanking{Type: "bm25", Properties: []string{"description"}, Query: "journey"}
   377  		res, _, err := idx.objectSearch(context.TODO(), 5, nil, kwr, nil, nil, addit, nil, "", 0)
   378  		require.Nil(t, err)
   379  
   380  		require.Equal(t, uint64(4), res[0].DocID)
   381  		require.Equal(t, uint64(5), res[1].DocID)
   382  		require.Equal(t, uint64(6), res[2].DocID)
   383  		require.Equal(t, uint64(3), res[3].DocID)
   384  		require.Equal(t, uint64(2), res[4].DocID)
   385  		require.Len(t, res, 5) // four results have one of the terms
   386  	})
   387  
   388  	t.Run("Results from three properties", func(t *testing.T) {
   389  		kwr := &searchparams.KeywordRanking{Type: "bm25", Query: "none"}
   390  		res, _, err := idx.objectSearch(context.TODO(), 5, nil, kwr, nil, nil, addit, nil, "", 0)
   391  		require.Nil(t, err)
   392  
   393  		require.Equal(t, uint64(9), res[0].DocID)
   394  		require.Equal(t, uint64(0), res[1].DocID)
   395  		require.Equal(t, uint64(8), res[2].DocID)
   396  		require.Len(t, res, 3)
   397  	})
   398  
   399  	t.Run("Include additional explanations", func(t *testing.T) {
   400  		kwr := &searchparams.KeywordRanking{Type: "bm25", Properties: []string{"description"}, Query: "journey", AdditionalExplanations: true}
   401  		res, _, err := idx.objectSearch(context.TODO(), 5, nil, kwr, nil, nil, addit, nil, "", 0)
   402  		require.Nil(t, err)
   403  
   404  		// With additionalExplanations explainScore entry should be present
   405  		require.Contains(t, res[0].Object.Additional, "explainScore")
   406  		require.Contains(t, res[0].Object.Additional["explainScore"], "BM25")
   407  	})
   408  
   409  	t.Run("Array fields text", func(t *testing.T) {
   410  		kwr := &searchparams.KeywordRanking{Type: "bm25", Properties: []string{"multiTitles"}, Query: "dinner"}
   411  		res, _, err := idx.objectSearch(context.TODO(), 5, nil, kwr, nil, nil, addit, nil, "", 0)
   412  		require.Nil(t, err)
   413  
   414  		require.Len(t, res, 2)
   415  		require.Equal(t, uint64(0), res[0].DocID)
   416  		require.Equal(t, uint64(1), res[1].DocID)
   417  	})
   418  
   419  	t.Run("Array fields string", func(t *testing.T) {
   420  		kwr := &searchparams.KeywordRanking{Type: "bm25", Properties: []string{"multiTextWhitespace"}, Query: "MuuultiYell!"}
   421  		res, _, err := idx.objectSearch(context.TODO(), 5, nil, kwr, nil, nil, addit, nil, "", 0)
   422  		require.Nil(t, err)
   423  
   424  		require.Len(t, res, 2)
   425  		require.Equal(t, uint64(6), res[0].DocID)
   426  		require.Equal(t, uint64(5), res[1].DocID)
   427  	})
   428  
   429  	t.Run("With autocut", func(t *testing.T) {
   430  		kwr := &searchparams.KeywordRanking{Type: "bm25", Query: "journey", Properties: []string{"description"}}
   431  		resNoAutoCut, noautocutscores, err := idx.objectSearch(context.TODO(), 10, nil, kwr, nil, nil, addit, nil, "", 0)
   432  		require.Nil(t, err)
   433  
   434  		resAutoCut, autocutscores, err := idx.objectSearch(context.TODO(), 10, nil, kwr, nil, nil, addit, nil, "", 1)
   435  		require.Nil(t, err)
   436  
   437  		require.Less(t, len(resAutoCut), len(resNoAutoCut))
   438  
   439  		require.EqualValues(t, float32(0.5868752), noautocutscores[0])
   440  		require.EqualValues(t, float32(0.5450892), noautocutscores[1]) // <= autocut last element
   441  		require.EqualValues(t, float32(0.34149727), noautocutscores[2])
   442  		require.EqualValues(t, float32(0.3049518), noautocutscores[3])
   443  		require.EqualValues(t, float32(0.27547202), noautocutscores[4])
   444  
   445  		require.Len(t, resAutoCut, 2)
   446  		require.EqualValues(t, float32(0.5868752), autocutscores[0])
   447  		require.EqualValues(t, float32(0.5450892), autocutscores[1])
   448  	})
   449  }
   450  
   451  func TestBM25FSingleProp(t *testing.T) {
   452  	dirName := t.TempDir()
   453  
   454  	logger := logrus.New()
   455  	schemaGetter := &fakeSchemaGetter{
   456  		schema:     schema.Schema{Objects: &models.Schema{Classes: nil}},
   457  		shardState: singleShardState(),
   458  	}
   459  	repo, err := New(logger, Config{
   460  		MemtablesFlushDirtyAfter:  60,
   461  		RootPath:                  dirName,
   462  		QueryMaximumResults:       10000,
   463  		MaxImportGoroutinesFactor: 1,
   464  	}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, nil, nil)
   465  	require.Nil(t, err)
   466  	repo.SetSchemaGetter(schemaGetter)
   467  	require.Nil(t, repo.WaitForStartup(context.TODO()))
   468  	defer repo.Shutdown(context.Background())
   469  
   470  	SetupClass(t, repo, schemaGetter, logger, 0.5, 100)
   471  
   472  	idx := repo.GetIndex("MyClass")
   473  	require.NotNil(t, idx)
   474  
   475  	// Check boosted
   476  	kwr := &searchparams.KeywordRanking{Type: "bm25", Properties: []string{"description"}, Query: "journey"}
   477  	addit := additional.Properties{}
   478  	res, scores, err := idx.objectSearch(context.TODO(), 1000, nil, kwr, nil, nil, addit, nil, "", 0)
   479  	t.Log("--- Start results for singleprop search ---")
   480  	for i, r := range res {
   481  		t.Logf("Result id: %v, score: %v, title: %v, description: %v, additional %+v\n", r.DocID, scores[i], r.Object.Properties.(map[string]interface{})["title"], r.Object.Properties.(map[string]interface{})["description"], r.Object.Additional)
   482  	}
   483  	require.Nil(t, err)
   484  	// Check results in correct order
   485  	require.Equal(t, uint64(3), res[0].DocID)
   486  	require.Equal(t, uint64(4), res[3].DocID)
   487  
   488  	// Check scores
   489  	EqualFloats(t, float32(0.1248), scores[0], 5)
   490  	EqualFloats(t, float32(0.0363), scores[1], 5)
   491  }
   492  
   493  func TestBM25FWithFilters(t *testing.T) {
   494  	dirName := t.TempDir()
   495  
   496  	logger := logrus.New()
   497  	schemaGetter := &fakeSchemaGetter{
   498  		schema:     schema.Schema{Objects: &models.Schema{Classes: nil}},
   499  		shardState: singleShardState(),
   500  	}
   501  	repo, err := New(logger, Config{
   502  		MemtablesFlushDirtyAfter:  60,
   503  		RootPath:                  dirName,
   504  		QueryMaximumResults:       10000,
   505  		MaxImportGoroutinesFactor: 1,
   506  	}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, nil, nil)
   507  	require.Nil(t, err)
   508  	repo.SetSchemaGetter(schemaGetter)
   509  	require.Nil(t, repo.WaitForStartup(context.TODO()))
   510  	defer repo.Shutdown(context.Background())
   511  
   512  	SetupClass(t, repo, schemaGetter, logger, 0.5, 100)
   513  
   514  	idx := repo.GetIndex("MyClass")
   515  	require.NotNil(t, idx)
   516  
   517  	filter := &filters.LocalFilter{
   518  		Root: &filters.Clause{
   519  			Operator: filters.OperatorOr,
   520  			Operands: []filters.Clause{
   521  				{
   522  					Operator: filters.OperatorEqual,
   523  					On: &filters.Path{
   524  						Class:    schema.ClassName("MyClass"),
   525  						Property: schema.PropertyName("title"),
   526  					},
   527  					Value: &filters.Value{
   528  						Value: "My",
   529  						Type:  schema.DataType("text"),
   530  					},
   531  				},
   532  				{
   533  					Operator: filters.OperatorEqual,
   534  					On: &filters.Path{
   535  						Class:    schema.ClassName("MyClass"),
   536  						Property: schema.PropertyName("title"),
   537  					},
   538  					Value: &filters.Value{
   539  						Value: "journeys",
   540  						Type:  schema.DataType("text"),
   541  					},
   542  				},
   543  			},
   544  		},
   545  	}
   546  
   547  	kwr := &searchparams.KeywordRanking{Type: "bm25", Properties: []string{"description"}, Query: "journey"}
   548  	addit := additional.Properties{}
   549  	res, _, err := idx.objectSearch(context.TODO(), 1000, filter, kwr, nil, nil, addit, nil, "", 0)
   550  
   551  	require.Nil(t, err)
   552  	require.True(t, len(res) == 1)
   553  	require.Equal(t, uint64(2), res[0].DocID)
   554  }
   555  
   556  func TestBM25FWithFilters_ScoreIsIdenticalWithOrWithoutFilter(t *testing.T) {
   557  	dirName := t.TempDir()
   558  
   559  	logger := logrus.New()
   560  	schemaGetter := &fakeSchemaGetter{
   561  		schema:     schema.Schema{Objects: &models.Schema{Classes: nil}},
   562  		shardState: singleShardState(),
   563  	}
   564  	repo, err := New(logger, Config{
   565  		MemtablesFlushDirtyAfter:  60,
   566  		RootPath:                  dirName,
   567  		QueryMaximumResults:       10000,
   568  		MaxImportGoroutinesFactor: 1,
   569  	}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, nil, nil)
   570  	require.Nil(t, err)
   571  	repo.SetSchemaGetter(schemaGetter)
   572  	require.Nil(t, repo.WaitForStartup(context.TODO()))
   573  	defer repo.Shutdown(context.Background())
   574  
   575  	SetupClassForFilterScoringTest(t, repo, schemaGetter, logger, 1.2, 0.75)
   576  
   577  	idx := repo.GetIndex("FilterClass")
   578  	require.NotNil(t, idx)
   579  
   580  	filter := &filters.LocalFilter{
   581  		Root: &filters.Clause{
   582  			On: &filters.Path{
   583  				Class:    schema.ClassName("FilterClass"),
   584  				Property: schema.PropertyName("relatedToGolf"),
   585  			},
   586  			Operator: filters.OperatorEqual,
   587  			Value: &filters.Value{
   588  				Value: true,
   589  				Type:  dtBool,
   590  			},
   591  		},
   592  	}
   593  
   594  	kwr := &searchparams.KeywordRanking{
   595  		Type:       "bm25",
   596  		Properties: []string{"description"},
   597  		Query:      "koepka golf",
   598  	}
   599  
   600  	addit := additional.Properties{}
   601  	filtered, filteredScores, err := idx.objectSearch(context.TODO(), 1000, filter, kwr, nil, nil, addit, nil, "", 0)
   602  	require.Nil(t, err)
   603  	unfiltered, unfilteredScores, err := idx.objectSearch(context.TODO(), 1000, nil, kwr, nil, nil, addit, nil, "", 0)
   604  	require.Nil(t, err)
   605  
   606  	require.Len(t, filtered, 1)   // should match exactly one element
   607  	require.Len(t, unfiltered, 2) // contains irrelevant result
   608  
   609  	assert.Equal(t, uint64(0), filtered[0].DocID)   // brooks koepka result
   610  	assert.Equal(t, uint64(0), unfiltered[0].DocID) // brooks koepka result
   611  
   612  	assert.Equal(t, filteredScores[0], unfilteredScores[0])
   613  }
   614  
   615  func TestBM25FDifferentParamsJourney(t *testing.T) {
   616  	dirName := t.TempDir()
   617  
   618  	logger := logrus.New()
   619  	schemaGetter := &fakeSchemaGetter{
   620  		schema:     schema.Schema{Objects: &models.Schema{Classes: nil}},
   621  		shardState: singleShardState(),
   622  	}
   623  	repo, err := New(logger, Config{
   624  		MemtablesFlushDirtyAfter:  60,
   625  		RootPath:                  dirName,
   626  		QueryMaximumResults:       10000,
   627  		MaxImportGoroutinesFactor: 1,
   628  	}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, nil, nil)
   629  	require.Nil(t, err)
   630  	repo.SetSchemaGetter(schemaGetter)
   631  	require.Nil(t, repo.WaitForStartup(context.TODO()))
   632  	defer repo.Shutdown(context.Background())
   633  
   634  	SetupClass(t, repo, schemaGetter, logger, 0.5, 100)
   635  
   636  	idx := repo.GetIndex("MyClass")
   637  	require.NotNil(t, idx)
   638  
   639  	// Check boosted
   640  	kwr := &searchparams.KeywordRanking{Type: "bm25", Properties: []string{"title^2", "description"}, Query: "journey"}
   641  	addit := additional.Properties{}
   642  	res, scores, err := idx.objectSearch(context.TODO(), 1000, nil, kwr, nil, nil, addit, nil, "", 0)
   643  
   644  	// Print results
   645  	t.Log("--- Start results for boosted search ---")
   646  	for i, r := range res {
   647  		t.Logf("Result id: %v, score: %v, title: %v, description: %v, additional %+v\n", r.DocID, scores[i], r.Object.Properties.(map[string]interface{})["title"], r.Object.Properties.(map[string]interface{})["description"], r.Object.Additional)
   648  	}
   649  
   650  	require.Nil(t, err)
   651  
   652  	// Check results in correct order
   653  	require.Equal(t, uint64(6), res[0].DocID)
   654  	require.Equal(t, uint64(1), res[3].DocID)
   655  
   656  	// Print results
   657  	t.Log("--- Start results for boosted search ---")
   658  	for i, r := range res {
   659  		t.Logf("Result id: %v, score: %v, title: %v, description: %v, additional %+v\n", r.DocID, scores[i], r.Object.Properties.(map[string]interface{})["title"], r.Object.Properties.(map[string]interface{})["description"], r.Object.Additional)
   660  	}
   661  
   662  	// Check scores
   663  	EqualFloats(t, float32(0.06023), scores[0], 6)
   664  	EqualFloats(t, float32(0.04238), scores[1], 6)
   665  }
   666  
   667  func EqualFloats(t *testing.T, expected, actual float32, significantFigures int) {
   668  	s1 := fmt.Sprintf("%v", expected)
   669  	s2 := fmt.Sprintf("%v", actual)
   670  	if len(s1) < 2 || len(s2) < 2 {
   671  		t.Fail()
   672  	}
   673  	if len(s1) <= significantFigures {
   674  		significantFigures = len(s1) - 1
   675  	}
   676  	if len(s2) <= significantFigures {
   677  		significantFigures = len(s2) - 1
   678  	}
   679  	require.Equal(t, s1[:significantFigures+1], s2[:significantFigures+1])
   680  }
   681  
   682  // Compare with previous BM25 version to ensure the algorithm functions correctly
   683  func TestBM25FCompare(t *testing.T) {
   684  	dirName := t.TempDir()
   685  
   686  	logger := logrus.New()
   687  	schemaGetter := &fakeSchemaGetter{
   688  		schema:     schema.Schema{Objects: &models.Schema{Classes: nil}},
   689  		shardState: singleShardState(),
   690  	}
   691  	repo, err := New(logger, Config{
   692  		MemtablesFlushDirtyAfter:  60,
   693  		RootPath:                  dirName,
   694  		QueryMaximumResults:       10000,
   695  		MaxImportGoroutinesFactor: 1,
   696  	}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, nil, nil)
   697  	require.Nil(t, err)
   698  	repo.SetSchemaGetter(schemaGetter)
   699  	require.Nil(t, repo.WaitForStartup(context.TODO()))
   700  	defer repo.Shutdown(context.Background())
   701  
   702  	SetupClass(t, repo, schemaGetter, logger, 0.5, 100)
   703  
   704  	idx := repo.GetIndex("MyClass")
   705  	require.NotNil(t, idx)
   706  
   707  	shardNames := idx.getSchema.CopyShardingState(idx.Config.ClassName.String()).AllPhysicalShards()
   708  
   709  	for _, shardName := range shardNames {
   710  		shard := idx.shards.Load(shardName)
   711  		t.Logf("------ BM25F --------\n")
   712  		kwr := &searchparams.KeywordRanking{Type: "bm25", Properties: []string{"title"}, Query: "journey"}
   713  		addit := additional.Properties{}
   714  
   715  		withBM25Fobjs, withBM25Fscores, err := shard.ObjectSearch(context.TODO(), 1000, nil, kwr, nil, nil, addit)
   716  		require.Nil(t, err)
   717  
   718  		for i, r := range withBM25Fobjs {
   719  			t.Logf("Result id: %v, score: %v, title: %v, description: %v, additional %+v\n", r.DocID, withBM25Fscores[i], r.Object.Properties.(map[string]interface{})["title"], r.Object.Properties.(map[string]interface{})["description"], r.Object.Additional)
   720  		}
   721  
   722  		t.Logf("------ BM25 --------\n")
   723  		kwr.Type = ""
   724  
   725  		objs, scores, err := shard.ObjectSearch(context.TODO(), 1000, nil, kwr, nil, nil, addit)
   726  		require.Nil(t, err)
   727  
   728  		for i, r := range objs {
   729  			t.Logf("Result id: %v, score: %v, title: %v, description: %v, additional %+v\n", r.DocID, scores[i], r.Object.Properties.(map[string]interface{})["title"], r.Object.Properties.(map[string]interface{})["description"], r.Object.Additional)
   730  		}
   731  
   732  		require.Equal(t, len(withBM25Fobjs), len(objs))
   733  		for i := range objs {
   734  			t.Logf("%v: BM25F score: %v, BM25 score: %v", i, withBM25Fscores[i], scores[i])
   735  			EqualFloats(t, withBM25Fscores[i], scores[i], 9)
   736  		}
   737  
   738  		// Not all the scores are unique and the search is not stable, so pick ones that don't move
   739  		require.Equal(t, uint64(4), objs[0].DocID)
   740  		require.Equal(t, uint64(5), objs[1].DocID)
   741  		require.Equal(t, uint64(6), objs[2].DocID)
   742  		require.Equal(t, uint64(1), objs[3].DocID)
   743  		require.Equal(t, uint64(2), objs[4].DocID)
   744  		require.Equal(t, uint64(0), objs[5].DocID)
   745  
   746  		require.Equal(t, uint64(4), withBM25Fobjs[0].DocID)
   747  		require.Equal(t, uint64(5), withBM25Fobjs[1].DocID)
   748  		require.Equal(t, uint64(6), withBM25Fobjs[2].DocID)
   749  		require.Equal(t, uint64(1), withBM25Fobjs[3].DocID)
   750  		require.Equal(t, uint64(2), withBM25Fobjs[4].DocID)
   751  		require.Equal(t, uint64(0), withBM25Fobjs[5].DocID)
   752  
   753  	}
   754  }
   755  
   756  func Test_propertyHasSearchableIndex(t *testing.T) {
   757  	vFalse := false
   758  	vTrue := true
   759  
   760  	class := &models.Class{
   761  		VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
   762  		InvertedIndexConfig: BM25FinvertedConfig(1, 1, "none"),
   763  		Class:               "MyClass",
   764  
   765  		Properties: []*models.Property{
   766  			{
   767  				Name:            "title",
   768  				DataType:        schema.DataTypeText.PropString(),
   769  				Tokenization:    models.PropertyTokenizationWord,
   770  				IndexFilterable: &vFalse,
   771  				IndexSearchable: nil,
   772  			},
   773  			{
   774  				Name:            "description",
   775  				DataType:        schema.DataTypeText.PropString(),
   776  				Tokenization:    models.PropertyTokenizationWord,
   777  				IndexFilterable: &vFalse,
   778  				IndexSearchable: &vTrue,
   779  			},
   780  			{
   781  				Name:            "textField",
   782  				DataType:        schema.DataTypeText.PropString(),
   783  				Tokenization:    models.PropertyTokenizationField,
   784  				IndexFilterable: &vFalse,
   785  				IndexSearchable: &vFalse,
   786  			},
   787  		},
   788  	}
   789  
   790  	ClassSchema := &models.Schema{
   791  		Classes: []*models.Class{class},
   792  	}
   793  	t.Run("Property index", func(t *testing.T) {
   794  		if got := inverted.PropertyHasSearchableIndex(ClassSchema, "MyClass", "description"); got != true {
   795  			t.Errorf("PropertyHasSearchableIndex() = %v, want %v", got, true)
   796  		}
   797  
   798  		if got := inverted.PropertyHasSearchableIndex(ClassSchema, "MyClass", "description^2"); got != true {
   799  			t.Errorf("PropertyHasSearchableIndex() = %v, want %v", got, true)
   800  		}
   801  
   802  		if got := inverted.PropertyHasSearchableIndex(ClassSchema, "MyClass", "textField"); got != false {
   803  			t.Errorf("PropertyHasSearchableIndex() = %v, want %v", got, false)
   804  		}
   805  
   806  		if got := inverted.PropertyHasSearchableIndex(ClassSchema, "MyClass", "title"); got != true {
   807  			t.Errorf("PropertyHasSearchableIndex() = %v, want %v", got, true)
   808  		}
   809  	})
   810  }
   811  
   812  func SetupClassDocuments(t require.TestingT, repo *DB, schemaGetter *fakeSchemaGetter, logger logrus.FieldLogger, k1, b float32, preset string,
   813  ) string {
   814  	vFalse := false
   815  	vTrue := true
   816  
   817  	className := "DocumentsPreset_" + preset
   818  	class := &models.Class{
   819  		VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
   820  		InvertedIndexConfig: BM25FinvertedConfig(k1, b, preset),
   821  		Class:               className,
   822  
   823  		Properties: []*models.Property{
   824  			{
   825  				Name:            "document",
   826  				DataType:        schema.DataTypeText.PropString(),
   827  				Tokenization:    models.PropertyTokenizationWord,
   828  				IndexFilterable: &vFalse,
   829  				IndexSearchable: &vTrue,
   830  			},
   831  		},
   832  	}
   833  	schemaGetter.schema = schema.Schema{
   834  		Objects: &models.Schema{
   835  			Classes: []*models.Class{class},
   836  		},
   837  	}
   838  
   839  	migrator := NewMigrator(repo, logger)
   840  	migrator.AddClass(context.Background(), class, schemaGetter.shardState)
   841  
   842  	testData := []map[string]interface{}{}
   843  	testData = append(testData, map[string]interface{}{"document": "No matter what you do, the question of \"\"what is income\"\" is *always* going to be an extremely complex question.   To use this particular example, is paying a royalty fee to an external party a legitimate business expense that is part of the cost of doing business and which subtracts from your \"\"income\"\"?"})
   844  	testData = append(testData, map[string]interface{}{"document": "test"})
   845  	testData = append(testData, map[string]interface{}{"document": "As long as the losing business is not considered \"\"passive activity\"\" or \"\"hobby\"\", then yes. Passive Activity is an activity where you do not have to actively do anything to generate income. For example - royalties or rentals. Hobby is an activity that doesn't generate profit. Generally, if your business doesn't consistently generate profit (the IRS looks at 3 out of the last 5 years), it may be characterized as hobby. For hobby, loss deduction is limited by the hobby income and the 2% AGI threshold."})
   846  	testData = append(testData, map[string]interface{}{"document": "So you're basically saying that average market fluctuations have an affect on individual stocks, because individual stocks are often priced in relation to the growth of the market as a whole?  Also, what kinds of investments would be considered \"\"risk free\"\" in this nomenclature?"})
   847  
   848  	for i, data := range testData {
   849  		id := strfmt.UUID(uuid.MustParse(fmt.Sprintf("%032d", i)).String())
   850  
   851  		obj := &models.Object{Class: className, ID: id, Properties: data, CreationTimeUnix: 1565612833955, LastUpdateTimeUnix: 10000020}
   852  		vector := []float32{1, 3, 5, 0.4}
   853  		//{title: "Our journey to BM25F", description: " This is how we get to BM25F"}}
   854  		err := repo.PutObject(context.Background(), obj, vector, nil, nil)
   855  		require.Nil(t, err)
   856  	}
   857  	return className
   858  }
   859  
   860  func TestBM25F_ComplexDocuments(t *testing.T) {
   861  	dirName := t.TempDir()
   862  
   863  	logger := logrus.New()
   864  	schemaGetter := &fakeSchemaGetter{
   865  		schema:     schema.Schema{Objects: &models.Schema{Classes: nil}},
   866  		shardState: singleShardState(),
   867  	}
   868  	schemaGetter.schema = schema.Schema{
   869  		Objects: &models.Schema{
   870  			Classes: []*models.Class{},
   871  		},
   872  	}
   873  	repo, err := New(logger, Config{
   874  		MemtablesFlushDirtyAfter:  60,
   875  		RootPath:                  dirName,
   876  		QueryMaximumResults:       10000,
   877  		MaxImportGoroutinesFactor: 1,
   878  	}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, nil, nil)
   879  	require.Nil(t, err)
   880  	repo.SetSchemaGetter(schemaGetter)
   881  	require.Nil(t, repo.WaitForStartup(context.TODO()))
   882  	defer repo.Shutdown(context.Background())
   883  
   884  	classNone := SetupClassDocuments(t, repo, schemaGetter, logger, 0.5, 0.75, "none")
   885  	idxNone := repo.GetIndex(schema.ClassName(classNone))
   886  	require.NotNil(t, idxNone)
   887  
   888  	addit := additional.Properties{}
   889  
   890  	t.Run("single term", func(t *testing.T) {
   891  		kwr := &searchparams.KeywordRanking{Type: "bm25", Query: "considered a"}
   892  		res, scores, err := idxNone.objectSearch(context.TODO(), 10, nil, kwr, nil, nil, addit, nil, "", 0)
   893  		require.Nil(t, err)
   894  
   895  		// Print results
   896  		t.Log("--- Start results for boosted search ---")
   897  		for i, r := range res {
   898  			t.Logf("Result id: %v, score: %v, \n", r.DocID, scores[i])
   899  		}
   900  
   901  		// Check results in correct order
   902  		require.Equal(t, uint64(3), res[0].DocID)
   903  		require.Equal(t, uint64(0), res[1].DocID)
   904  		require.Equal(t, uint64(2), res[2].DocID)
   905  		require.Len(t, res, 3)
   906  
   907  		// Check scores
   908  		EqualFloats(t, float32(0.8914), scores[0], 5)
   909  		EqualFloats(t, float32(0.5425), scores[1], 5)
   910  		EqualFloats(t, float32(0.3952), scores[2], 5)
   911  	})
   912  
   913  	t.Run("Results without stopwords", func(t *testing.T) {
   914  		kwrNoStopwords := &searchparams.KeywordRanking{Type: "bm25", Query: "example losing business"}
   915  		resNoStopwords, resNoScores, err := idxNone.objectSearch(context.TODO(), 10, nil, kwrNoStopwords, nil, nil, addit, nil, "", 0)
   916  		require.Nil(t, err)
   917  
   918  		classEn := SetupClassDocuments(t, repo, schemaGetter, logger, 0.5, 0.75, "en")
   919  		idxEn := repo.GetIndex(schema.ClassName(classEn))
   920  		require.NotNil(t, idxEn)
   921  		kwrStopwords := &searchparams.KeywordRanking{Type: "bm25", Query: "an example on losing the business"}
   922  		resStopwords, resScores, err := idxEn.objectSearch(context.TODO(), 10, nil, kwrStopwords, nil, nil, addit, nil, "", 0)
   923  		require.Nil(t, err)
   924  
   925  		require.Equal(t, len(resNoStopwords), len(resStopwords))
   926  		for i, resNo := range resNoStopwords {
   927  			resYes := resStopwords[i]
   928  			require.Equal(t, resNo.DocID, resYes.DocID)
   929  			require.Equal(t, resNoScores[i], resScores[i])
   930  		}
   931  
   932  		kwrStopwordsDuplicate := &searchparams.KeywordRanking{Type: "bm25", Query: "on an example on losing the business on"}
   933  		resStopwordsDuplicate, duplicateScores, err := idxEn.objectSearch(context.TODO(), 10, nil, kwrStopwordsDuplicate, nil, nil, addit, nil, "", 0)
   934  		require.Nil(t, err)
   935  		require.Equal(t, len(resNoStopwords), len(resStopwordsDuplicate))
   936  		for i, resNo := range resNoStopwords {
   937  			resYes := resStopwordsDuplicate[i]
   938  			require.Equal(t, resNo.DocID, resYes.DocID)
   939  			require.Equal(t, resNoScores[i], duplicateScores[i])
   940  		}
   941  	})
   942  }
   943  
   944  func MultiPropClass(t require.TestingT, repo *DB, schemaGetter *fakeSchemaGetter, logger logrus.FieldLogger, k1, b float32) string {
   945  	vFalse := false
   946  	vTrue := true
   947  
   948  	className := "MultiProps"
   949  	class := &models.Class{
   950  		VectorIndexConfig:   enthnsw.NewDefaultUserConfig(),
   951  		InvertedIndexConfig: BM25FinvertedConfig(k1, b, "none"),
   952  		Class:               className,
   953  
   954  		Properties: []*models.Property{
   955  			{
   956  				Name:            "document",
   957  				DataType:        schema.DataTypeText.PropString(),
   958  				Tokenization:    models.PropertyTokenizationWord,
   959  				IndexFilterable: &vFalse,
   960  				IndexSearchable: &vTrue,
   961  			},
   962  			{
   963  				Name:            "title",
   964  				DataType:        schema.DataTypeText.PropString(),
   965  				Tokenization:    models.PropertyTokenizationWord,
   966  				IndexFilterable: &vFalse,
   967  				IndexSearchable: &vTrue,
   968  			},
   969  		},
   970  	}
   971  	schemaGetter.schema = schema.Schema{
   972  		Objects: &models.Schema{
   973  			Classes: []*models.Class{class},
   974  		},
   975  	}
   976  
   977  	migrator := NewMigrator(repo, logger)
   978  	migrator.AddClass(context.Background(), class, schemaGetter.shardState)
   979  
   980  	testData := []map[string]interface{}{}
   981  	testData = append(testData, map[string]interface{}{"document": "test", "title": "pepper"})
   982  	testData = append(testData, map[string]interface{}{"document": "banana", "title": "pepper"})
   983  	testData = append(testData, map[string]interface{}{"document": "apple", "title": "banana taste great"})
   984  	testData = append(testData, map[string]interface{}{"document": "banana burger", "title": "test"})
   985  	testData = append(testData, map[string]interface{}{"document": "carotte", "title": "great"})
   986  
   987  	for i, data := range testData {
   988  		id := strfmt.UUID(uuid.MustParse(fmt.Sprintf("%032d", i)).String())
   989  
   990  		obj := &models.Object{Class: className, ID: id, Properties: data, CreationTimeUnix: 1565612833955, LastUpdateTimeUnix: 10000020}
   991  		vector := []float32{1, 3, 5, 0.4}
   992  		err := repo.PutObject(context.Background(), obj, vector, nil, nil)
   993  		require.Nil(t, err)
   994  	}
   995  	return className
   996  }
   997  
   998  func TestBM25F_SortMultiProp(t *testing.T) {
   999  	t.Skip("Currently failing")
  1000  	dirName := t.TempDir()
  1001  
  1002  	logger := logrus.New()
  1003  	schemaGetter := &fakeSchemaGetter{
  1004  		schema:     schema.Schema{Objects: &models.Schema{Classes: nil}},
  1005  		shardState: singleShardState(),
  1006  	}
  1007  	schemaGetter.schema = schema.Schema{
  1008  		Objects: &models.Schema{
  1009  			Classes: []*models.Class{},
  1010  		},
  1011  	}
  1012  	repo, err := New(logger, Config{
  1013  		MemtablesFlushDirtyAfter:  60,
  1014  		RootPath:                  dirName,
  1015  		QueryMaximumResults:       10000,
  1016  		MaxImportGoroutinesFactor: 1,
  1017  	}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, nil, nil)
  1018  	require.Nil(t, err)
  1019  	repo.SetSchemaGetter(schemaGetter)
  1020  	require.Nil(t, repo.WaitForStartup(context.TODO()))
  1021  	defer repo.Shutdown(context.Background())
  1022  
  1023  	idx := repo.GetIndex(schema.ClassName(MultiPropClass(t, repo, schemaGetter, logger, 0.5, 0.75)))
  1024  	require.NotNil(t, idx)
  1025  
  1026  	addit := additional.Properties{}
  1027  
  1028  	t.Run("single term", func(t *testing.T) {
  1029  		kwr := &searchparams.KeywordRanking{Type: "bm25", Query: "pepper banana"}
  1030  		res, scores, err := idx.objectSearch(context.TODO(), 1, nil, kwr, nil, nil, addit, nil, "", 0)
  1031  		require.Nil(t, err)
  1032  
  1033  		// Print results
  1034  		t.Log("--- Start results for boosted search ---")
  1035  		for i, r := range res {
  1036  			t.Logf("Result id: %v, score: %v, \n", r.DocID, scores[i])
  1037  		}
  1038  
  1039  		// Document 1 is a result for both terms
  1040  		require.Len(t, res, 1)
  1041  		require.Equal(t, uint64(1), res[0].DocID)
  1042  	})
  1043  }