github.com/weaviate/weaviate@v1.24.6/modules/text2vec-contextionary/vectorizer/vectorizer_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package vectorizer 13 14 import ( 15 "context" 16 "strings" 17 "testing" 18 19 "github.com/stretchr/testify/assert" 20 "github.com/stretchr/testify/require" 21 "github.com/weaviate/weaviate/entities/models" 22 "github.com/weaviate/weaviate/entities/moduletools" 23 "github.com/weaviate/weaviate/entities/schema" 24 ) 25 26 func TestVectorizingObjects(t *testing.T) { 27 type testCase struct { 28 name string 29 input *models.Object 30 expectedClientCall []string 31 noindex string 32 excludedProperty string // to simulate a schema where property names aren't vectorized 33 excludedClass string // to simulate a schema where class names aren't vectorized 34 } 35 36 propsSchema := []*models.Property{ 37 { 38 Name: "brand", 39 DataType: schema.DataTypeText.PropString(), 40 }, 41 { 42 Name: "power", 43 DataType: schema.DataTypeInt.PropString(), 44 }, 45 { 46 Name: "review", 47 DataType: schema.DataTypeText.PropString(), 48 }, 49 { 50 Name: "brandOfTheCar", 51 DataType: schema.DataTypeText.PropString(), 52 }, 53 { 54 Name: "reviews", 55 DataType: schema.DataTypeTextArray.PropString(), 56 }, 57 } 58 59 tests := []testCase{ 60 { 61 name: "empty object", 62 input: &models.Object{ 63 Class: "Car", 64 }, 65 expectedClientCall: []string{"car"}, 66 }, 67 { 68 name: "object with one string prop", 69 input: &models.Object{ 70 Class: "Car", 71 Properties: map[string]interface{}{ 72 "brand": "Mercedes", 73 }, 74 }, 75 expectedClientCall: []string{"car brand mercedes"}, 76 }, 77 78 { 79 name: "object with one non-string prop", 80 input: &models.Object{ 81 Class: "Car", 82 Properties: map[string]interface{}{ 83 "power": 300, 84 }, 85 }, 86 expectedClientCall: []string{"car"}, 87 }, 88 89 { 90 name: "object with a mix of props", 91 input: &models.Object{ 92 Class: "Car", 93 Properties: map[string]interface{}{ 94 "brand": "best brand", 95 "power": 300, 96 "review": "a very great car", 97 }, 98 }, 99 expectedClientCall: []string{"car brand best brand review a very great car"}, 100 }, 101 { 102 name: "with a noindexed property", 103 noindex: "review", 104 input: &models.Object{ 105 Class: "Car", 106 Properties: map[string]interface{}{ 107 "brand": "best brand", 108 "power": 300, 109 "review": "a very great car", 110 }, 111 }, 112 expectedClientCall: []string{"car brand best brand"}, 113 }, 114 115 { 116 name: "with the class name not vectorized", 117 excludedClass: "Car", 118 input: &models.Object{ 119 Class: "Car", 120 Properties: map[string]interface{}{ 121 "brand": "best brand", 122 "power": 300, 123 "review": "a very great car", 124 }, 125 }, 126 expectedClientCall: []string{"brand best brand review a very great car"}, 127 }, 128 129 { 130 name: "with a property name not vectorized", 131 excludedProperty: "review", 132 input: &models.Object{ 133 Class: "Car", 134 Properties: map[string]interface{}{ 135 "brand": "best brand", 136 "power": 300, 137 "review": "a very great car", 138 }, 139 }, 140 expectedClientCall: []string{"car brand best brand a very great car"}, 141 }, 142 143 { 144 name: "with no schema labels vectorized", 145 excludedProperty: "review", 146 excludedClass: "Car", 147 input: &models.Object{ 148 Class: "Car", 149 Properties: map[string]interface{}{ 150 "review": "a very great car", 151 }, 152 }, 153 expectedClientCall: []string{"a very great car"}, 154 }, 155 156 { 157 name: "with string/text arrays without propname or classname", 158 excludedProperty: "reviews", 159 excludedClass: "Car", 160 input: &models.Object{ 161 Class: "Car", 162 Properties: map[string]interface{}{ 163 "reviews": []string{ 164 "a very great car", 165 "you should consider buying one", 166 }, 167 }, 168 }, 169 expectedClientCall: []string{"a very great car you should consider buying one"}, 170 }, 171 172 { 173 name: "with string/text arrays with propname and classname", 174 input: &models.Object{ 175 Class: "Car", 176 Properties: map[string]interface{}{ 177 "reviews": []string{ 178 "a very great car", 179 "you should consider buying one", 180 }, 181 }, 182 }, 183 expectedClientCall: []string{"car reviews a very great car reviews you should consider buying one"}, 184 }, 185 186 { 187 name: "with compound class and prop names", 188 input: &models.Object{ 189 Class: "SuperCar", 190 Properties: map[string]interface{}{ 191 "brandOfTheCar": "best brand", 192 "power": 300, 193 "review": "a very great car", 194 }, 195 }, 196 expectedClientCall: []string{"super car brand of the car best brand review a very great car"}, 197 }, 198 } 199 200 for _, test := range tests { 201 t.Run(test.name, func(t *testing.T) { 202 ic := &fakeClassConfig{ 203 excludedProperty: test.excludedProperty, 204 skippedProperty: test.noindex, 205 vectorizeClassName: test.excludedClass != "Car", 206 vectorizePropertyName: true, 207 } 208 209 client := &fakeClient{} 210 v := New(client) 211 212 comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties) 213 vector, _, err := v.Object(context.Background(), test.input, comp, ic) 214 215 require.Nil(t, err) 216 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 217 expected := strings.Split(test.expectedClientCall[0], " ") 218 actual := strings.Split(client.lastInput[0], " ") 219 assert.ElementsMatch(t, expected, actual) 220 }) 221 } 222 } 223 224 func TestVectorizingObjectsWithDiff(t *testing.T) { 225 type testCase struct { 226 name string 227 input *models.Object 228 skipped string 229 comp moduletools.VectorizablePropsComparator 230 expectedVectorize bool 231 } 232 233 propsSchema := []*models.Property{ 234 { 235 Name: "brand", 236 DataType: schema.DataTypeText.PropString(), 237 }, 238 { 239 Name: "power", 240 DataType: schema.DataTypeInt.PropString(), 241 }, 242 { 243 Name: "description", 244 DataType: schema.DataTypeText.PropString(), 245 }, 246 { 247 Name: "reviews", 248 DataType: schema.DataTypeTextArray.PropString(), 249 }, 250 } 251 props := map[string]interface{}{ 252 "brand": "best brand", 253 "power": 300, 254 "description": "a very great car", 255 "reviews": []string{ 256 "a very great car", 257 "you should consider buying one", 258 }, 259 } 260 vector := []float32{0, 0, 0, 0} 261 var vectors models.Vectors 262 263 tests := []testCase{ 264 { 265 name: "noop comp", 266 input: &models.Object{ 267 Class: "Car", 268 Properties: props, 269 }, 270 comp: moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props), 271 expectedVectorize: true, 272 }, 273 { 274 name: "all props unchanged", 275 input: &models.Object{ 276 Class: "Car", 277 Properties: props, 278 }, 279 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors), 280 expectedVectorize: false, 281 }, 282 { 283 name: "one vectorizable prop changed (1)", 284 input: &models.Object{ 285 Class: "Car", 286 Properties: props, 287 }, 288 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 289 "brand": "old best brand", 290 "power": 300, 291 "description": "a very great car", 292 "reviews": []string{ 293 "a very great car", 294 "you should consider buying one", 295 }, 296 }, vector, vectors), 297 expectedVectorize: true, 298 }, 299 { 300 name: "one vectorizable prop changed (2)", 301 input: &models.Object{ 302 Class: "Car", 303 Properties: props, 304 }, 305 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 306 "brand": "best brand", 307 "power": 300, 308 "description": "old a very great car", 309 "reviews": []string{ 310 "a very great car", 311 "you should consider buying one", 312 }, 313 }, vector, vectors), 314 expectedVectorize: true, 315 }, 316 { 317 name: "one vectorizable prop changed (3)", 318 input: &models.Object{ 319 Class: "Car", 320 Properties: props, 321 }, 322 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 323 "brand": "best brand", 324 "power": 300, 325 "description": "a very great car", 326 "reviews": []string{ 327 "old a very great car", 328 "you should consider buying one", 329 }, 330 }, vector, vectors), 331 expectedVectorize: true, 332 }, 333 { 334 name: "all non-vectorizable props changed", 335 skipped: "description", 336 input: &models.Object{ 337 Class: "Car", 338 Properties: props, 339 }, 340 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 341 "brand": "best brand", 342 "power": 123, 343 "description": "old a very great car", 344 "reviews": []string{ 345 "a very great car", 346 "you should consider buying one", 347 }, 348 }, vector, vectors), 349 expectedVectorize: false, 350 }, 351 } 352 353 for _, test := range tests { 354 t.Run(test.name, func(t *testing.T) { 355 ic := &fakeClassConfig{ 356 skippedProperty: test.skipped, 357 } 358 359 client := &fakeClient{} 360 v := New(client) 361 362 vector, _, err := v.Object(context.Background(), test.input, test.comp, ic) 363 364 require.Nil(t, err) 365 if test.expectedVectorize { 366 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 367 assert.NotNil(t, client.lastInput) 368 } else { 369 assert.Equal(t, []float32{0, 0, 0, 0}, vector) 370 assert.Nil(t, client.lastInput) 371 } 372 }) 373 } 374 } 375 376 func TestVectorizingActions(t *testing.T) { 377 type testCase struct { 378 name string 379 input *models.Object 380 expectedClientCall []string 381 noindex string 382 excludedProperty string // to simulate a schema where property names aren't vectorized 383 excludedClass string // to simulate a schema where class names aren't vectorized 384 } 385 386 propsSchema := []*models.Property{ 387 { 388 Name: "brand", 389 DataType: schema.DataTypeText.PropString(), 390 }, 391 { 392 Name: "length", 393 DataType: schema.DataTypeInt.PropString(), 394 }, 395 { 396 Name: "review", 397 DataType: schema.DataTypeText.PropString(), 398 }, 399 } 400 401 tests := []testCase{ 402 { 403 name: "empty object", 404 input: &models.Object{ 405 Class: "Flight", 406 }, 407 expectedClientCall: []string{"flight"}, 408 }, 409 { 410 name: "object with one string prop", 411 input: &models.Object{ 412 Class: "Flight", 413 Properties: map[string]interface{}{ 414 "brand": "Mercedes", 415 }, 416 }, 417 expectedClientCall: []string{"flight brand mercedes"}, 418 }, 419 420 { 421 name: "object with one non-string prop", 422 input: &models.Object{ 423 Class: "Flight", 424 Properties: map[string]interface{}{ 425 "length": 300, 426 }, 427 }, 428 expectedClientCall: []string{"flight"}, 429 }, 430 431 { 432 name: "object with a mix of props", 433 input: &models.Object{ 434 Class: "Flight", 435 Properties: map[string]interface{}{ 436 "brand": "best brand", 437 "length": 300, 438 "review": "a very great flight", 439 }, 440 }, 441 expectedClientCall: []string{"flight brand best brand review a very great flight"}, 442 }, 443 } 444 445 for _, test := range tests { 446 t.Run(test.name, func(t *testing.T) { 447 client := &fakeClient{} 448 v := New(client) 449 450 ic := &fakeClassConfig{ 451 excludedProperty: test.excludedProperty, 452 skippedProperty: test.noindex, 453 vectorizeClassName: test.excludedClass != "Flight", 454 vectorizePropertyName: true, 455 } 456 comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties) 457 vector, _, err := v.Object(context.Background(), test.input, comp, ic) 458 459 require.Nil(t, err) 460 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 461 expected := strings.Split(test.expectedClientCall[0], " ") 462 actual := strings.Split(client.lastInput[0], " ") 463 assert.ElementsMatch(t, expected, actual) 464 }) 465 } 466 } 467 468 func TestVectorizingSearchTerms(t *testing.T) { 469 type testCase struct { 470 name string 471 input []string 472 expectedClientCall []string 473 } 474 475 tests := []testCase{ 476 { 477 name: "single word", 478 input: []string{"car"}, 479 expectedClientCall: []string{"car"}, 480 }, 481 { 482 name: "multiple entries with multiple words", 483 input: []string{"car", "car brand"}, 484 expectedClientCall: []string{"car", "car brand"}, 485 }, 486 { 487 name: "multiple entries with upper casing", 488 input: []string{"Car", "Car Brand"}, 489 expectedClientCall: []string{"car", "car brand"}, 490 }, 491 { 492 name: "with camel cased words", 493 input: []string{"Car", "CarBrand"}, 494 expectedClientCall: []string{"car", "car brand"}, 495 }, 496 } 497 498 for _, test := range tests { 499 t.Run(test.name, func(t *testing.T) { 500 client := &fakeClient{} 501 v := New(client) 502 503 res, err := v.Corpi(context.Background(), test.input) 504 505 require.Nil(t, err) 506 assert.Equal(t, []float32{0, 1, 2, 3}, res) 507 assert.ElementsMatch(t, test.expectedClientCall, client.lastInput) 508 }) 509 } 510 }