github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/config_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package inverted 13 14 import ( 15 "math" 16 "testing" 17 18 "github.com/stretchr/testify/assert" 19 "github.com/weaviate/weaviate/entities/models" 20 "github.com/weaviate/weaviate/entities/schema" 21 "github.com/weaviate/weaviate/usecases/config" 22 ) 23 24 const float64EqualityThreshold = 1e-6 25 26 func almostEqual(t *testing.T, a, b float64) bool { 27 closeEnough := math.Abs(a-b) <= float64EqualityThreshold 28 if !closeEnough { 29 t.Logf("%f and %f differ by more than a threshold of %f", 30 a, b, float64EqualityThreshold) 31 } 32 return closeEnough 33 } 34 35 func TestValidateConfig(t *testing.T) { 36 t.Run("with invalid BM25.k1", func(t *testing.T) { 37 in := &models.InvertedIndexConfig{ 38 Bm25: &models.BM25Config{ 39 K1: -1, 40 B: 0.7, 41 }, 42 } 43 44 err := ValidateConfig(in) 45 assert.EqualError(t, err, "BM25.k1 must be >= 0") 46 }) 47 48 t.Run("with invalid BM25.b", func(t *testing.T) { 49 in := &models.InvertedIndexConfig{ 50 Bm25: &models.BM25Config{ 51 K1: 1, 52 B: 1.001, 53 }, 54 } 55 56 err := ValidateConfig(in) 57 assert.EqualError(t, err, "BM25.b must be <= 0 and <= 1") 58 }) 59 60 t.Run("with valid config", func(t *testing.T) { 61 in := &models.InvertedIndexConfig{ 62 Bm25: &models.BM25Config{ 63 K1: 1, 64 B: 0.1, 65 }, 66 } 67 68 err := ValidateConfig(in) 69 assert.Nil(t, err) 70 }) 71 72 t.Run("with nonexistent stopword preset", func(t *testing.T) { 73 in := &models.InvertedIndexConfig{ 74 Stopwords: &models.StopwordConfig{ 75 Preset: "DNE", 76 }, 77 } 78 79 err := ValidateConfig(in) 80 assert.EqualError(t, err, "stopwordPreset 'DNE' does not exist") 81 }) 82 83 t.Run("with whitespace stopword additions", func(t *testing.T) { 84 additions := [][]string{ 85 {"bats", " "}, 86 {""}, 87 {"something", " ", "skippable"}, 88 } 89 90 for _, addList := range additions { 91 in := &models.InvertedIndexConfig{ 92 Stopwords: &models.StopwordConfig{ 93 Additions: addList, 94 }, 95 } 96 97 err := ValidateConfig(in) 98 assert.EqualError(t, err, "cannot use whitespace in stopword.additions") 99 } 100 }) 101 102 t.Run("with whitespace stopword removals", func(t *testing.T) { 103 removals := [][]string{ 104 {"bats", " "}, 105 {""}, 106 {"something", " ", "skippable"}, 107 } 108 109 for _, remList := range removals { 110 in := &models.InvertedIndexConfig{ 111 Stopwords: &models.StopwordConfig{ 112 Removals: remList, 113 }, 114 } 115 116 err := ValidateConfig(in) 117 assert.EqualError(t, err, "cannot use whitespace in stopword.removals") 118 } 119 }) 120 121 t.Run("with shared additions/removals items", func(t *testing.T) { 122 in := &models.InvertedIndexConfig{ 123 Stopwords: &models.StopwordConfig{ 124 Additions: []string{"some", "words", "are", "different"}, 125 Removals: []string{"and", "some", "the", "same"}, 126 }, 127 } 128 129 err := ValidateConfig(in) 130 assert.EqualError(t, err, 131 "found 'some' in both stopwords.additions and stopwords.removals") 132 }) 133 134 t.Run("with additions that exist in preset", func(t *testing.T) { 135 tests := []struct { 136 additions []string 137 expectedLength int 138 }{ 139 { 140 additions: []string{"superfluous", "extravagant", "a"}, 141 expectedLength: 2, 142 }, 143 { 144 additions: []string{"a", "are", "the"}, 145 expectedLength: 0, 146 }, 147 { 148 additions: []string{"everyone", "sleeps", "eventually"}, 149 expectedLength: 3, 150 }, 151 } 152 153 for _, test := range tests { 154 in := &models.InvertedIndexConfig{ 155 Stopwords: &models.StopwordConfig{ 156 Preset: "en", 157 Additions: test.additions, 158 }, 159 } 160 161 err := ValidateConfig(in) 162 assert.Nil(t, err) 163 assert.Equal(t, test.expectedLength, len(in.Stopwords.Additions)) 164 } 165 }) 166 } 167 168 func TestConfigFromModel(t *testing.T) { 169 t.Run("with all fields set", func(t *testing.T) { 170 k1 := 1.12 171 b := 0.7 172 173 in := &models.InvertedIndexConfig{ 174 Bm25: &models.BM25Config{ 175 K1: float32(k1), 176 B: float32(b), 177 }, 178 Stopwords: &models.StopwordConfig{ 179 Preset: "en", 180 }, 181 } 182 183 expected := schema.InvertedIndexConfig{ 184 BM25: schema.BM25Config{ 185 K1: k1, 186 B: b, 187 }, 188 Stopwords: models.StopwordConfig{ 189 Preset: "en", 190 }, 191 } 192 193 conf := ConfigFromModel(in) 194 assert.True(t, almostEqual(t, conf.BM25.K1, expected.BM25.K1)) 195 assert.True(t, almostEqual(t, conf.BM25.B, expected.BM25.B)) 196 assert.Equal(t, expected.Stopwords, conf.Stopwords) 197 }) 198 199 t.Run("with no BM25 params set", func(t *testing.T) { 200 interval := int64(1) 201 202 in := &models.InvertedIndexConfig{ 203 CleanupIntervalSeconds: interval, 204 } 205 206 expected := schema.InvertedIndexConfig{ 207 BM25: schema.BM25Config{ 208 K1: float64(config.DefaultBM25k1), 209 B: float64(config.DefaultBM25b), 210 }, 211 } 212 213 conf := ConfigFromModel(in) 214 assert.True(t, almostEqual(t, conf.BM25.K1, expected.BM25.K1)) 215 assert.True(t, almostEqual(t, conf.BM25.B, expected.BM25.B)) 216 }) 217 218 t.Run("with no Stopword config set", func(t *testing.T) { 219 interval := int64(1) 220 221 in := &models.InvertedIndexConfig{ 222 CleanupIntervalSeconds: interval, 223 } 224 225 expected := schema.InvertedIndexConfig{ 226 Stopwords: models.StopwordConfig{ 227 Preset: "en", 228 }, 229 } 230 231 conf := ConfigFromModel(in) 232 assert.Equal(t, expected.Stopwords, conf.Stopwords) 233 }) 234 }