github.com/weaviate/weaviate@v1.24.6/modules/text2vec-openai/vectorizer/objects_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package vectorizer 13 14 import ( 15 "context" 16 "fmt" 17 "testing" 18 19 "github.com/stretchr/testify/assert" 20 "github.com/stretchr/testify/require" 21 "github.com/weaviate/weaviate/entities/models" 22 "github.com/weaviate/weaviate/entities/moduletools" 23 "github.com/weaviate/weaviate/entities/schema" 24 ) 25 26 // These are mostly copy/pasted (with minimal additions) from the 27 // text2vec-contextionary module 28 func TestVectorizingObjects(t *testing.T) { 29 type testCase struct { 30 name string 31 input *models.Object 32 expectedClientCall string 33 expectedOpenAIType string 34 expectedOpenAIModel string 35 noindex string 36 excludedProperty string // to simulate a schema where property names aren't vectorized 37 excludedClass string // to simulate a schema where class names aren't vectorized 38 openAIType string 39 openAIModel string 40 openAIModelVersion string 41 } 42 43 propsSchema := []*models.Property{ 44 { 45 Name: "brand", 46 DataType: schema.DataTypeText.PropString(), 47 }, 48 { 49 Name: "power", 50 DataType: schema.DataTypeInt.PropString(), 51 }, 52 { 53 Name: "review", 54 DataType: schema.DataTypeText.PropString(), 55 }, 56 { 57 Name: "brandOfTheCar", 58 DataType: schema.DataTypeText.PropString(), 59 }, 60 { 61 Name: "reviews", 62 DataType: schema.DataTypeTextArray.PropString(), 63 }, 64 } 65 66 tests := []testCase{ 67 { 68 name: "empty object", 69 input: &models.Object{ 70 Class: "Car", 71 }, 72 openAIType: "text", 73 openAIModel: "ada", 74 expectedOpenAIType: "text", 75 expectedOpenAIModel: "ada", 76 expectedClientCall: "car", 77 }, 78 { 79 name: "object with one string prop", 80 input: &models.Object{ 81 Class: "Car", 82 Properties: map[string]interface{}{ 83 "brand": "Mercedes", 84 }, 85 }, 86 expectedClientCall: "car brand mercedes", 87 }, 88 { 89 name: "object with one non-string prop", 90 input: &models.Object{ 91 Class: "Car", 92 Properties: map[string]interface{}{ 93 "power": 300, 94 }, 95 }, 96 expectedClientCall: "car", 97 }, 98 { 99 name: "object with a mix of props", 100 input: &models.Object{ 101 Class: "Car", 102 Properties: map[string]interface{}{ 103 "brand": "best brand", 104 "power": 300, 105 "review": "a very great car", 106 }, 107 }, 108 expectedClientCall: "car brand best brand review a very great car", 109 }, 110 { 111 name: "with a noindexed property", 112 noindex: "review", 113 input: &models.Object{ 114 Class: "Car", 115 Properties: map[string]interface{}{ 116 "brand": "best brand", 117 "power": 300, 118 "review": "a very great car", 119 }, 120 }, 121 expectedClientCall: "car brand best brand", 122 }, 123 { 124 name: "with the class name not vectorized", 125 excludedClass: "Car", 126 input: &models.Object{ 127 Class: "Car", 128 Properties: map[string]interface{}{ 129 "brand": "best brand", 130 "power": 300, 131 "review": "a very great car", 132 }, 133 }, 134 expectedClientCall: "brand best brand review a very great car", 135 }, 136 { 137 name: "with a property name not vectorized", 138 excludedProperty: "review", 139 input: &models.Object{ 140 Class: "Car", 141 Properties: map[string]interface{}{ 142 "brand": "best brand", 143 "power": 300, 144 "review": "a very great car", 145 }, 146 }, 147 expectedClientCall: "car brand best brand a very great car", 148 }, 149 { 150 name: "with no schema labels vectorized", 151 excludedProperty: "review", 152 excludedClass: "Car", 153 input: &models.Object{ 154 Class: "Car", 155 Properties: map[string]interface{}{ 156 "review": "a very great car", 157 }, 158 }, 159 expectedClientCall: "a very great car", 160 }, 161 { 162 name: "with string/text arrays without propname or classname", 163 excludedProperty: "reviews", 164 excludedClass: "Car", 165 input: &models.Object{ 166 Class: "Car", 167 Properties: map[string]interface{}{ 168 "reviews": []string{ 169 "a very great car", 170 "you should consider buying one", 171 }, 172 }, 173 }, 174 expectedClientCall: "a very great car you should consider buying one", 175 }, 176 { 177 name: "with string/text arrays with propname and classname", 178 input: &models.Object{ 179 Class: "Car", 180 Properties: map[string]interface{}{ 181 "reviews": []string{ 182 "a very great car", 183 "you should consider buying one", 184 }, 185 }, 186 }, 187 expectedClientCall: "car reviews a very great car reviews you should consider buying one", 188 }, 189 { 190 name: "with compound class and prop names", 191 input: &models.Object{ 192 Class: "SuperCar", 193 Properties: map[string]interface{}{ 194 "brandOfTheCar": "best brand", 195 "power": 300, 196 "review": "a very great car", 197 }, 198 }, 199 expectedClientCall: "super car brand of the car best brand review a very great car", 200 }, 201 } 202 203 for _, test := range tests { 204 t.Run(test.name, func(t *testing.T) { 205 client := &fakeClient{} 206 207 v := New(client) 208 209 cfg := &fakeClassConfig{ 210 classConfig: map[string]interface{}{ 211 "vectorizeClassName": test.excludedClass != "Car", 212 "type": test.openAIType, 213 "model": test.openAIModel, 214 "modelVersion": test.openAIModelVersion, 215 }, 216 vectorizePropertyName: true, 217 skippedProperty: test.noindex, 218 excludedProperty: test.excludedProperty, 219 } 220 comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties) 221 vector, _, err := v.Object(context.Background(), test.input, comp, cfg) 222 223 require.Nil(t, err) 224 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 225 assert.Equal(t, []string{test.expectedClientCall}, client.lastInput) 226 assert.Equal(t, test.expectedOpenAIType, client.lastConfig.Type) 227 assert.Equal(t, test.expectedOpenAIModel, client.lastConfig.Model) 228 }) 229 } 230 } 231 232 func TestClassSettings(t *testing.T) { 233 type testCase struct { 234 expectedBaseURL string 235 cfg moduletools.ClassConfig 236 } 237 tests := []testCase{ 238 { 239 cfg: fakeClassConfig{ 240 classConfig: make(map[string]interface{}), 241 }, 242 expectedBaseURL: DefaultBaseURL, 243 }, 244 { 245 cfg: fakeClassConfig{ 246 classConfig: map[string]interface{}{ 247 "baseURL": "https://proxy.weaviate.dev", 248 }, 249 }, 250 expectedBaseURL: "https://proxy.weaviate.dev", 251 }, 252 } 253 254 for _, tt := range tests { 255 ic := NewClassSettings(tt.cfg) 256 assert.Equal(t, tt.expectedBaseURL, ic.BaseURL()) 257 } 258 } 259 260 func TestVectorizingObjectWithDiff(t *testing.T) { 261 type testCase struct { 262 name string 263 input *models.Object 264 skipped string 265 comp moduletools.VectorizablePropsComparator 266 expectedVectorize bool 267 } 268 269 propsSchema := []*models.Property{ 270 { 271 Name: "brand", 272 DataType: schema.DataTypeText.PropString(), 273 }, 274 { 275 Name: "power", 276 DataType: schema.DataTypeInt.PropString(), 277 }, 278 { 279 Name: "description", 280 DataType: schema.DataTypeText.PropString(), 281 }, 282 { 283 Name: "reviews", 284 DataType: schema.DataTypeTextArray.PropString(), 285 }, 286 } 287 props := map[string]interface{}{ 288 "brand": "best brand", 289 "power": 300, 290 "description": "a very great car", 291 "reviews": []string{ 292 "a very great car", 293 "you should consider buying one", 294 }, 295 } 296 vector := []float32{0, 0, 0, 0} 297 var vectors models.Vectors 298 299 tests := []testCase{ 300 { 301 name: "noop comp", 302 input: &models.Object{ 303 Class: "Car", 304 Properties: props, 305 }, 306 comp: moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props), 307 expectedVectorize: true, 308 }, 309 { 310 name: "all props unchanged", 311 input: &models.Object{ 312 Class: "Car", 313 Properties: props, 314 }, 315 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors), 316 expectedVectorize: false, 317 }, 318 { 319 name: "one vectorizable prop changed (1)", 320 input: &models.Object{ 321 Class: "Car", 322 Properties: props, 323 }, 324 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 325 "brand": "old best brand", 326 "power": 300, 327 "description": "a very great car", 328 "reviews": []string{ 329 "a very great car", 330 "you should consider buying one", 331 }, 332 }, vector, vectors), 333 expectedVectorize: true, 334 }, 335 { 336 name: "one vectorizable prop changed (2)", 337 input: &models.Object{ 338 Class: "Car", 339 Properties: props, 340 }, 341 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 342 "brand": "best brand", 343 "power": 300, 344 "description": "old a very great car", 345 "reviews": []string{ 346 "a very great car", 347 "you should consider buying one", 348 }, 349 }, vector, vectors), 350 expectedVectorize: true, 351 }, 352 { 353 name: "one vectorizable prop changed (3)", 354 input: &models.Object{ 355 Class: "Car", 356 Properties: props, 357 }, 358 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 359 "brand": "best brand", 360 "power": 300, 361 "description": "a very great car", 362 "reviews": []string{ 363 "old a very great car", 364 "you should consider buying one", 365 }, 366 }, vector, vectors), 367 expectedVectorize: true, 368 }, 369 { 370 name: "all non-vectorizable props changed", 371 skipped: "description", 372 input: &models.Object{ 373 Class: "Car", 374 Properties: props, 375 }, 376 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 377 "brand": "best brand", 378 "power": 123, 379 "description": "old a very great car", 380 "reviews": []string{ 381 "a very great car", 382 "you should consider buying one", 383 }, 384 }, vector, vectors), 385 expectedVectorize: false, 386 }, 387 } 388 389 for _, test := range tests { 390 t.Run(test.name, func(t *testing.T) { 391 cfg := &fakeClassConfig{ 392 skippedProperty: test.skipped, 393 } 394 395 client := &fakeClient{} 396 v := New(client) 397 398 vector, _, err := v.Object(context.Background(), test.input, test.comp, cfg) 399 400 require.Nil(t, err) 401 if test.expectedVectorize { 402 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 403 assert.NotEmpty(t, client.lastInput) 404 } else { 405 assert.Equal(t, []float32{0, 0, 0, 0}, vector) 406 assert.Empty(t, client.lastInput) 407 } 408 }) 409 } 410 } 411 412 func TestValidateModelVersion(t *testing.T) { 413 type test struct { 414 model string 415 docType string 416 version string 417 possible bool 418 } 419 420 tests := []test{ 421 // 001 models 422 {"ada", "text", "001", true}, 423 {"ada", "code", "001", true}, 424 {"babbage", "text", "001", true}, 425 {"babbage", "code", "001", true}, 426 {"curie", "text", "001", true}, 427 {"curie", "code", "001", true}, 428 {"davinci", "text", "001", true}, 429 {"davinci", "code", "001", true}, 430 431 // 002 models 432 {"ada", "text", "002", true}, 433 {"davinci", "text", "002", true}, 434 {"ada", "code", "002", false}, 435 {"babbage", "text", "002", false}, 436 {"babbage", "code", "002", false}, 437 {"curie", "text", "002", false}, 438 {"curie", "code", "002", false}, 439 {"davinci", "code", "002", false}, 440 441 // 003 442 {"davinci", "text", "003", true}, 443 {"ada", "text", "003", false}, 444 {"babbage", "text", "003", false}, 445 446 // 004 447 {"davinci", "text", "004", false}, 448 {"ada", "text", "004", false}, 449 {"babbage", "text", "004", false}, 450 } 451 452 for _, test := range tests { 453 name := fmt.Sprintf("model=%s docType=%s version=%s", test.model, test.docType, test.version) 454 t.Run(name, func(t *testing.T) { 455 err := (&classSettings{}).validateModelVersion(test.version, test.model, test.docType) 456 if test.possible { 457 assert.Nil(t, err, "this combination should be possible") 458 } else { 459 assert.NotNil(t, err, "this combination should not be possible") 460 } 461 }) 462 } 463 } 464 465 func TestPickDefaultModelVersion(t *testing.T) { 466 t.Run("ada with text", func(t *testing.T) { 467 version := PickDefaultModelVersion("ada", "text") 468 assert.Equal(t, "002", version) 469 }) 470 471 t.Run("ada with code", func(t *testing.T) { 472 version := PickDefaultModelVersion("ada", "code") 473 assert.Equal(t, "001", version) 474 }) 475 476 t.Run("with curie", func(t *testing.T) { 477 version := PickDefaultModelVersion("curie", "text") 478 assert.Equal(t, "001", version) 479 }) 480 }