github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/config_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package inverted
    13  
    14  import (
    15  	"math"
    16  	"testing"
    17  
    18  	"github.com/stretchr/testify/assert"
    19  	"github.com/weaviate/weaviate/entities/models"
    20  	"github.com/weaviate/weaviate/entities/schema"
    21  	"github.com/weaviate/weaviate/usecases/config"
    22  )
    23  
    24  const float64EqualityThreshold = 1e-6
    25  
    26  func almostEqual(t *testing.T, a, b float64) bool {
    27  	closeEnough := math.Abs(a-b) <= float64EqualityThreshold
    28  	if !closeEnough {
    29  		t.Logf("%f and %f differ by more than a threshold of %f",
    30  			a, b, float64EqualityThreshold)
    31  	}
    32  	return closeEnough
    33  }
    34  
    35  func TestValidateConfig(t *testing.T) {
    36  	t.Run("with invalid BM25.k1", func(t *testing.T) {
    37  		in := &models.InvertedIndexConfig{
    38  			Bm25: &models.BM25Config{
    39  				K1: -1,
    40  				B:  0.7,
    41  			},
    42  		}
    43  
    44  		err := ValidateConfig(in)
    45  		assert.EqualError(t, err, "BM25.k1 must be >= 0")
    46  	})
    47  
    48  	t.Run("with invalid BM25.b", func(t *testing.T) {
    49  		in := &models.InvertedIndexConfig{
    50  			Bm25: &models.BM25Config{
    51  				K1: 1,
    52  				B:  1.001,
    53  			},
    54  		}
    55  
    56  		err := ValidateConfig(in)
    57  		assert.EqualError(t, err, "BM25.b must be <= 0 and <= 1")
    58  	})
    59  
    60  	t.Run("with valid config", func(t *testing.T) {
    61  		in := &models.InvertedIndexConfig{
    62  			Bm25: &models.BM25Config{
    63  				K1: 1,
    64  				B:  0.1,
    65  			},
    66  		}
    67  
    68  		err := ValidateConfig(in)
    69  		assert.Nil(t, err)
    70  	})
    71  
    72  	t.Run("with nonexistent stopword preset", func(t *testing.T) {
    73  		in := &models.InvertedIndexConfig{
    74  			Stopwords: &models.StopwordConfig{
    75  				Preset: "DNE",
    76  			},
    77  		}
    78  
    79  		err := ValidateConfig(in)
    80  		assert.EqualError(t, err, "stopwordPreset 'DNE' does not exist")
    81  	})
    82  
    83  	t.Run("with whitespace stopword additions", func(t *testing.T) {
    84  		additions := [][]string{
    85  			{"bats", " "},
    86  			{""},
    87  			{"something", "   ", "skippable"},
    88  		}
    89  
    90  		for _, addList := range additions {
    91  			in := &models.InvertedIndexConfig{
    92  				Stopwords: &models.StopwordConfig{
    93  					Additions: addList,
    94  				},
    95  			}
    96  
    97  			err := ValidateConfig(in)
    98  			assert.EqualError(t, err, "cannot use whitespace in stopword.additions")
    99  		}
   100  	})
   101  
   102  	t.Run("with whitespace stopword removals", func(t *testing.T) {
   103  		removals := [][]string{
   104  			{"bats", " "},
   105  			{""},
   106  			{"something", "   ", "skippable"},
   107  		}
   108  
   109  		for _, remList := range removals {
   110  			in := &models.InvertedIndexConfig{
   111  				Stopwords: &models.StopwordConfig{
   112  					Removals: remList,
   113  				},
   114  			}
   115  
   116  			err := ValidateConfig(in)
   117  			assert.EqualError(t, err, "cannot use whitespace in stopword.removals")
   118  		}
   119  	})
   120  
   121  	t.Run("with shared additions/removals items", func(t *testing.T) {
   122  		in := &models.InvertedIndexConfig{
   123  			Stopwords: &models.StopwordConfig{
   124  				Additions: []string{"some", "words", "are", "different"},
   125  				Removals:  []string{"and", "some", "the", "same"},
   126  			},
   127  		}
   128  
   129  		err := ValidateConfig(in)
   130  		assert.EqualError(t, err,
   131  			"found 'some' in both stopwords.additions and stopwords.removals")
   132  	})
   133  
   134  	t.Run("with additions that exist in preset", func(t *testing.T) {
   135  		tests := []struct {
   136  			additions      []string
   137  			expectedLength int
   138  		}{
   139  			{
   140  				additions:      []string{"superfluous", "extravagant", "a"},
   141  				expectedLength: 2,
   142  			},
   143  			{
   144  				additions:      []string{"a", "are", "the"},
   145  				expectedLength: 0,
   146  			},
   147  			{
   148  				additions:      []string{"everyone", "sleeps", "eventually"},
   149  				expectedLength: 3,
   150  			},
   151  		}
   152  
   153  		for _, test := range tests {
   154  			in := &models.InvertedIndexConfig{
   155  				Stopwords: &models.StopwordConfig{
   156  					Preset:    "en",
   157  					Additions: test.additions,
   158  				},
   159  			}
   160  
   161  			err := ValidateConfig(in)
   162  			assert.Nil(t, err)
   163  			assert.Equal(t, test.expectedLength, len(in.Stopwords.Additions))
   164  		}
   165  	})
   166  }
   167  
   168  func TestConfigFromModel(t *testing.T) {
   169  	t.Run("with all fields set", func(t *testing.T) {
   170  		k1 := 1.12
   171  		b := 0.7
   172  
   173  		in := &models.InvertedIndexConfig{
   174  			Bm25: &models.BM25Config{
   175  				K1: float32(k1),
   176  				B:  float32(b),
   177  			},
   178  			Stopwords: &models.StopwordConfig{
   179  				Preset: "en",
   180  			},
   181  		}
   182  
   183  		expected := schema.InvertedIndexConfig{
   184  			BM25: schema.BM25Config{
   185  				K1: k1,
   186  				B:  b,
   187  			},
   188  			Stopwords: models.StopwordConfig{
   189  				Preset: "en",
   190  			},
   191  		}
   192  
   193  		conf := ConfigFromModel(in)
   194  		assert.True(t, almostEqual(t, conf.BM25.K1, expected.BM25.K1))
   195  		assert.True(t, almostEqual(t, conf.BM25.B, expected.BM25.B))
   196  		assert.Equal(t, expected.Stopwords, conf.Stopwords)
   197  	})
   198  
   199  	t.Run("with no BM25 params set", func(t *testing.T) {
   200  		interval := int64(1)
   201  
   202  		in := &models.InvertedIndexConfig{
   203  			CleanupIntervalSeconds: interval,
   204  		}
   205  
   206  		expected := schema.InvertedIndexConfig{
   207  			BM25: schema.BM25Config{
   208  				K1: float64(config.DefaultBM25k1),
   209  				B:  float64(config.DefaultBM25b),
   210  			},
   211  		}
   212  
   213  		conf := ConfigFromModel(in)
   214  		assert.True(t, almostEqual(t, conf.BM25.K1, expected.BM25.K1))
   215  		assert.True(t, almostEqual(t, conf.BM25.B, expected.BM25.B))
   216  	})
   217  
   218  	t.Run("with no Stopword config set", func(t *testing.T) {
   219  		interval := int64(1)
   220  
   221  		in := &models.InvertedIndexConfig{
   222  			CleanupIntervalSeconds: interval,
   223  		}
   224  
   225  		expected := schema.InvertedIndexConfig{
   226  			Stopwords: models.StopwordConfig{
   227  				Preset: "en",
   228  			},
   229  		}
   230  
   231  		conf := ConfigFromModel(in)
   232  		assert.Equal(t, expected.Stopwords, conf.Stopwords)
   233  	})
   234  }