github.com/weaviate/weaviate@v1.24.6/modules/text2vec-gpt4all/vectorizer/objects_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package vectorizer 13 14 import ( 15 "context" 16 "strings" 17 "testing" 18 19 "github.com/stretchr/testify/assert" 20 "github.com/stretchr/testify/require" 21 "github.com/weaviate/weaviate/entities/models" 22 "github.com/weaviate/weaviate/entities/moduletools" 23 "github.com/weaviate/weaviate/entities/schema" 24 ) 25 26 func TestVectorizingObjects(t *testing.T) { 27 type testCase struct { 28 name string 29 input *models.Object 30 expectedClientCall string 31 noindex string 32 excludedProperty string // to simulate a schema where property names aren't vectorized 33 excludedClass string // to simulate a schema where class names aren't vectorized 34 } 35 36 propsSchema := []*models.Property{ 37 { 38 Name: "brand", 39 DataType: schema.DataTypeText.PropString(), 40 }, 41 { 42 Name: "power", 43 DataType: schema.DataTypeInt.PropString(), 44 }, 45 { 46 Name: "review", 47 DataType: schema.DataTypeText.PropString(), 48 }, 49 { 50 Name: "brandOfTheCar", 51 DataType: schema.DataTypeText.PropString(), 52 }, 53 { 54 Name: "reviews", 55 DataType: schema.DataTypeTextArray.PropString(), 56 }, 57 } 58 59 tests := []testCase{ 60 { 61 name: "empty object", 62 input: &models.Object{ 63 Class: "Car", 64 }, 65 expectedClientCall: "car", 66 }, 67 { 68 name: "object with one string prop", 69 input: &models.Object{ 70 Class: "Car", 71 Properties: map[string]interface{}{ 72 "brand": "Mercedes", 73 }, 74 }, 75 expectedClientCall: "car brand mercedes", 76 }, 77 { 78 name: "object with one non-string prop", 79 input: &models.Object{ 80 Class: "Car", 81 Properties: map[string]interface{}{ 82 "power": 300, 83 }, 84 }, 85 expectedClientCall: "car", 86 }, 87 { 88 name: "object with a mix of props", 89 input: &models.Object{ 90 Class: "Car", 91 Properties: map[string]interface{}{ 92 "brand": "best brand", 93 "power": 300, 94 "review": "a very great car", 95 }, 96 }, 97 expectedClientCall: "car brand best brand review a very great car", 98 }, 99 { 100 name: "with a noindexed property", 101 noindex: "review", 102 input: &models.Object{ 103 Class: "Car", 104 Properties: map[string]interface{}{ 105 "brand": "best brand", 106 "power": 300, 107 "review": "a very great car", 108 }, 109 }, 110 expectedClientCall: "car brand best brand", 111 }, 112 113 { 114 name: "with the class name not vectorized", 115 excludedClass: "Car", 116 input: &models.Object{ 117 Class: "Car", 118 Properties: map[string]interface{}{ 119 "brand": "best brand", 120 "power": 300, 121 "review": "a very great car", 122 }, 123 }, 124 expectedClientCall: "brand best brand review a very great car", 125 }, 126 { 127 name: "with a property name not vectorized", 128 excludedProperty: "review", 129 input: &models.Object{ 130 Class: "Car", 131 Properties: map[string]interface{}{ 132 "brand": "best brand", 133 "power": 300, 134 "review": "a very great car", 135 }, 136 }, 137 expectedClientCall: "car brand best brand a very great car", 138 }, 139 { 140 name: "with no schema labels vectorized", 141 excludedProperty: "review", 142 excludedClass: "Car", 143 input: &models.Object{ 144 Class: "Car", 145 Properties: map[string]interface{}{ 146 "review": "a very great car", 147 }, 148 }, 149 expectedClientCall: "a very great car", 150 }, 151 { 152 name: "with string/text arrays without propname or classname", 153 excludedProperty: "reviews", 154 excludedClass: "Car", 155 input: &models.Object{ 156 Class: "Car", 157 Properties: map[string]interface{}{ 158 "reviews": []string{ 159 "a very great car", 160 "you should consider buying one", 161 }, 162 }, 163 }, 164 expectedClientCall: "a very great car you should consider buying one", 165 }, 166 { 167 name: "with string/text arrays with propname and classname", 168 input: &models.Object{ 169 Class: "Car", 170 Properties: map[string]interface{}{ 171 "reviews": []string{ 172 "a very great car", 173 "you should consider buying one", 174 }, 175 }, 176 }, 177 expectedClientCall: "car reviews a very great car reviews you should consider buying one", 178 }, 179 { 180 name: "with compound class and prop names", 181 input: &models.Object{ 182 Class: "SuperCar", 183 Properties: map[string]interface{}{ 184 "brandOfTheCar": "best brand", 185 "power": 300, 186 "review": "a very great car", 187 }, 188 }, 189 expectedClientCall: "super car brand of the car best brand review a very great car", 190 }, 191 } 192 193 for _, test := range tests { 194 t.Run(test.name, func(t *testing.T) { 195 client := &fakeClient{} 196 197 v := New(client) 198 199 ic := &fakeClassConfig{ 200 excludedProperty: test.excludedProperty, 201 skippedProperty: test.noindex, 202 vectorizeClassName: test.excludedClass != "Car", 203 vectorizePropertyName: true, 204 } 205 comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties) 206 vector, _, err := v.Object(context.Background(), test.input, comp, ic) 207 208 require.Nil(t, err) 209 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 210 expected := strings.Split(test.expectedClientCall, " ") 211 actual := strings.Split(client.lastInput, " ") 212 assert.Equal(t, expected, actual) 213 }) 214 } 215 } 216 217 func TestVectorizingObjectsWithDiff(t *testing.T) { 218 type testCase struct { 219 name string 220 input *models.Object 221 skipped string 222 comp moduletools.VectorizablePropsComparator 223 expectedVectorize bool 224 } 225 226 propsSchema := []*models.Property{ 227 { 228 Name: "brand", 229 DataType: schema.DataTypeText.PropString(), 230 }, 231 { 232 Name: "power", 233 DataType: schema.DataTypeInt.PropString(), 234 }, 235 { 236 Name: "description", 237 DataType: schema.DataTypeText.PropString(), 238 }, 239 { 240 Name: "reviews", 241 DataType: schema.DataTypeTextArray.PropString(), 242 }, 243 } 244 props := map[string]interface{}{ 245 "brand": "best brand", 246 "power": 300, 247 "description": "a very great car", 248 "reviews": []string{ 249 "a very great car", 250 "you should consider buying one", 251 }, 252 } 253 vector := []float32{0, 0, 0, 0} 254 var vectors models.Vectors 255 256 tests := []testCase{ 257 { 258 name: "noop comp", 259 input: &models.Object{ 260 Class: "Car", 261 Properties: props, 262 }, 263 comp: moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props), 264 expectedVectorize: true, 265 }, 266 { 267 name: "all props unchanged", 268 input: &models.Object{ 269 Class: "Car", 270 Properties: props, 271 }, 272 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors), 273 expectedVectorize: false, 274 }, 275 { 276 name: "one vectorizable prop changed (1)", 277 input: &models.Object{ 278 Class: "Car", 279 Properties: props, 280 }, 281 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 282 "brand": "old best brand", 283 "power": 300, 284 "description": "a very great car", 285 "reviews": []string{ 286 "a very great car", 287 "you should consider buying one", 288 }, 289 }, vector, vectors), 290 expectedVectorize: true, 291 }, 292 { 293 name: "one vectorizable prop changed (2)", 294 input: &models.Object{ 295 Class: "Car", 296 Properties: props, 297 }, 298 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 299 "brand": "best brand", 300 "power": 300, 301 "description": "old a very great car", 302 "reviews": []string{ 303 "a very great car", 304 "you should consider buying one", 305 }, 306 }, vector, vectors), 307 expectedVectorize: true, 308 }, 309 { 310 name: "one vectorizable prop changed (3)", 311 input: &models.Object{ 312 Class: "Car", 313 Properties: props, 314 }, 315 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 316 "brand": "best brand", 317 "power": 300, 318 "description": "a very great car", 319 "reviews": []string{ 320 "old a very great car", 321 "you should consider buying one", 322 }, 323 }, vector, vectors), 324 expectedVectorize: true, 325 }, 326 { 327 name: "all non-vectorizable props changed", 328 skipped: "description", 329 input: &models.Object{ 330 Class: "Car", 331 Properties: props, 332 }, 333 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 334 "brand": "best brand", 335 "power": 123, 336 "description": "old a very great car", 337 "reviews": []string{ 338 "a very great car", 339 "you should consider buying one", 340 }, 341 }, vector, vectors), 342 expectedVectorize: false, 343 }, 344 } 345 346 for _, test := range tests { 347 t.Run(test.name, func(t *testing.T) { 348 ic := &fakeClassConfig{ 349 skippedProperty: test.skipped, 350 } 351 352 client := &fakeClient{} 353 v := New(client) 354 355 vector, _, err := v.Object(context.Background(), test.input, test.comp, ic) 356 357 require.Nil(t, err) 358 if test.expectedVectorize { 359 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 360 assert.NotEmpty(t, client.lastInput) 361 } else { 362 assert.Equal(t, []float32{0, 0, 0, 0}, vector) 363 assert.Empty(t, client.lastInput) 364 } 365 }) 366 } 367 }