github.com/weaviate/weaviate@v1.24.6/modules/text2vec-jinaai/vectorizer/objects_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package vectorizer 13 14 import ( 15 "context" 16 "testing" 17 18 "github.com/stretchr/testify/assert" 19 "github.com/stretchr/testify/require" 20 "github.com/weaviate/weaviate/entities/models" 21 "github.com/weaviate/weaviate/entities/moduletools" 22 "github.com/weaviate/weaviate/entities/schema" 23 ) 24 25 // These are mostly copy/pasted (with minimal additions) from the 26 // text2vec-contextionary module 27 func TestVectorizingObjects(t *testing.T) { 28 type testCase struct { 29 name string 30 input *models.Object 31 expectedClientCall string 32 expectedJinaAIModel string 33 noindex string 34 excludedProperty string // to simulate a schema where property names aren't vectorized 35 excludedClass string // to simulate a schema where class names aren't vectorized 36 jinaAIModel string 37 } 38 39 propsSchema := []*models.Property{ 40 { 41 Name: "brand", 42 DataType: schema.DataTypeText.PropString(), 43 }, 44 { 45 Name: "power", 46 DataType: schema.DataTypeInt.PropString(), 47 }, 48 { 49 Name: "review", 50 DataType: schema.DataTypeText.PropString(), 51 }, 52 { 53 Name: "brandOfTheCar", 54 DataType: schema.DataTypeText.PropString(), 55 }, 56 { 57 Name: "reviews", 58 DataType: schema.DataTypeTextArray.PropString(), 59 }, 60 } 61 62 tests := []testCase{ 63 { 64 name: "empty object", 65 input: &models.Object{ 66 Class: "Car", 67 }, 68 jinaAIModel: "jina-embedding-v2", 69 expectedJinaAIModel: "jina-embedding-v2", 70 expectedClientCall: "car", 71 }, 72 { 73 name: "object with one string prop", 74 input: &models.Object{ 75 Class: "Car", 76 Properties: map[string]interface{}{ 77 "brand": "Mercedes", 78 }, 79 }, 80 expectedClientCall: "car brand mercedes", 81 }, 82 { 83 name: "object with one non-string prop", 84 input: &models.Object{ 85 Class: "Car", 86 Properties: map[string]interface{}{ 87 "power": 300, 88 }, 89 }, 90 expectedClientCall: "car", 91 }, 92 { 93 name: "object with a mix of props", 94 input: &models.Object{ 95 Class: "Car", 96 Properties: map[string]interface{}{ 97 "brand": "best brand", 98 "power": 300, 99 "review": "a very great car", 100 }, 101 }, 102 expectedClientCall: "car brand best brand review a very great car", 103 }, 104 { 105 name: "with a noindexed property", 106 noindex: "review", 107 input: &models.Object{ 108 Class: "Car", 109 Properties: map[string]interface{}{ 110 "brand": "best brand", 111 "power": 300, 112 "review": "a very great car", 113 }, 114 }, 115 expectedClientCall: "car brand best brand", 116 }, 117 { 118 name: "with the class name not vectorized", 119 excludedClass: "Car", 120 input: &models.Object{ 121 Class: "Car", 122 Properties: map[string]interface{}{ 123 "brand": "best brand", 124 "power": 300, 125 "review": "a very great car", 126 }, 127 }, 128 expectedClientCall: "brand best brand review a very great car", 129 }, 130 { 131 name: "with a property name not vectorized", 132 excludedProperty: "review", 133 input: &models.Object{ 134 Class: "Car", 135 Properties: map[string]interface{}{ 136 "brand": "best brand", 137 "power": 300, 138 "review": "a very great car", 139 }, 140 }, 141 expectedClientCall: "car brand best brand a very great car", 142 }, 143 { 144 name: "with no schema labels vectorized", 145 excludedProperty: "review", 146 excludedClass: "Car", 147 input: &models.Object{ 148 Class: "Car", 149 Properties: map[string]interface{}{ 150 "review": "a very great car", 151 }, 152 }, 153 expectedClientCall: "a very great car", 154 }, 155 { 156 name: "with string/text arrays without propname or classname", 157 excludedProperty: "reviews", 158 excludedClass: "Car", 159 input: &models.Object{ 160 Class: "Car", 161 Properties: map[string]interface{}{ 162 "reviews": []string{ 163 "a very great car", 164 "you should consider buying one", 165 }, 166 }, 167 }, 168 expectedClientCall: "a very great car you should consider buying one", 169 }, 170 { 171 name: "with string/text arrays with propname and classname", 172 input: &models.Object{ 173 Class: "Car", 174 Properties: map[string]interface{}{ 175 "reviews": []string{ 176 "a very great car", 177 "you should consider buying one", 178 }, 179 }, 180 }, 181 expectedClientCall: "car reviews a very great car reviews you should consider buying one", 182 }, 183 { 184 name: "with compound class and prop names", 185 input: &models.Object{ 186 Class: "SuperCar", 187 Properties: map[string]interface{}{ 188 "brandOfTheCar": "best brand", 189 "power": 300, 190 "review": "a very great car", 191 }, 192 }, 193 expectedClientCall: "super car brand of the car best brand review a very great car", 194 }, 195 } 196 197 for _, test := range tests { 198 t.Run(test.name, func(t *testing.T) { 199 client := &fakeClient{} 200 201 v := New(client) 202 203 ic := &fakeClassConfig{ 204 excludedProperty: test.excludedProperty, 205 skippedProperty: test.noindex, 206 vectorizeClassName: test.excludedClass != "Car", 207 jinaAIModel: test.jinaAIModel, 208 vectorizePropertyName: true, 209 } 210 comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties) 211 vector, _, err := v.Object(context.Background(), test.input, comp, ic) 212 213 require.Nil(t, err) 214 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 215 assert.Equal(t, []string{test.expectedClientCall}, client.lastInput) 216 assert.Equal(t, client.lastConfig.Model, test.expectedJinaAIModel) 217 }) 218 } 219 } 220 221 func TestClassSettings(t *testing.T) { 222 type testCase struct { 223 expectedBaseURL string 224 cfg moduletools.ClassConfig 225 } 226 tests := []testCase{ 227 { 228 cfg: fakeClassConfig{ 229 classConfig: make(map[string]interface{}), 230 }, 231 expectedBaseURL: DefaultBaseURL, 232 }, 233 { 234 cfg: fakeClassConfig{ 235 classConfig: map[string]interface{}{ 236 "baseURL": "https://proxy.weaviate.dev", 237 }, 238 }, 239 expectedBaseURL: "https://proxy.weaviate.dev", 240 }, 241 } 242 243 for _, tt := range tests { 244 ic := NewClassSettings(tt.cfg) 245 assert.Equal(t, tt.expectedBaseURL, ic.BaseURL()) 246 } 247 } 248 249 func TestVectorizingObjectWithDiff(t *testing.T) { 250 type testCase struct { 251 name string 252 input *models.Object 253 skipped string 254 comp moduletools.VectorizablePropsComparator 255 expectedVectorize bool 256 } 257 258 propsSchema := []*models.Property{ 259 { 260 Name: "brand", 261 DataType: schema.DataTypeText.PropString(), 262 }, 263 { 264 Name: "power", 265 DataType: schema.DataTypeInt.PropString(), 266 }, 267 { 268 Name: "description", 269 DataType: schema.DataTypeText.PropString(), 270 }, 271 { 272 Name: "reviews", 273 DataType: schema.DataTypeTextArray.PropString(), 274 }, 275 } 276 props := map[string]interface{}{ 277 "brand": "best brand", 278 "power": 300, 279 "description": "a very great car", 280 "reviews": []string{ 281 "a very great car", 282 "you should consider buying one", 283 }, 284 } 285 vector := []float32{0, 0, 0, 0} 286 var vectors models.Vectors 287 288 tests := []testCase{ 289 { 290 name: "noop comp", 291 input: &models.Object{ 292 Class: "Car", 293 Properties: props, 294 }, 295 comp: moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props), 296 expectedVectorize: true, 297 }, 298 { 299 name: "all props unchanged", 300 input: &models.Object{ 301 Class: "Car", 302 Properties: props, 303 }, 304 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors), 305 expectedVectorize: false, 306 }, 307 { 308 name: "one vectorizable prop changed (1)", 309 input: &models.Object{ 310 Class: "Car", 311 Properties: props, 312 }, 313 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 314 "brand": "old best brand", 315 "power": 300, 316 "description": "a very great car", 317 "reviews": []string{ 318 "a very great car", 319 "you should consider buying one", 320 }, 321 }, vector, vectors), 322 expectedVectorize: true, 323 }, 324 { 325 name: "one vectorizable prop changed (2)", 326 input: &models.Object{ 327 Class: "Car", 328 Properties: props, 329 }, 330 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 331 "brand": "best brand", 332 "power": 300, 333 "description": "old a very great car", 334 "reviews": []string{ 335 "a very great car", 336 "you should consider buying one", 337 }, 338 }, vector, vectors), 339 expectedVectorize: true, 340 }, 341 { 342 name: "one vectorizable prop changed (3)", 343 input: &models.Object{ 344 Class: "Car", 345 Properties: props, 346 }, 347 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 348 "brand": "best brand", 349 "power": 300, 350 "description": "a very great car", 351 "reviews": []string{ 352 "old a very great car", 353 "you should consider buying one", 354 }, 355 }, vector, vectors), 356 expectedVectorize: true, 357 }, 358 { 359 name: "all non-vectorizable props changed", 360 skipped: "description", 361 input: &models.Object{ 362 Class: "Car", 363 Properties: props, 364 }, 365 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 366 "brand": "best brand", 367 "power": 123, 368 "description": "old a very great car", 369 "reviews": []string{ 370 "a very great car", 371 "you should consider buying one", 372 }, 373 }, vector, vectors), 374 expectedVectorize: false, 375 }, 376 } 377 378 for _, test := range tests { 379 t.Run(test.name, func(t *testing.T) { 380 ic := &fakeClassConfig{ 381 skippedProperty: test.skipped, 382 } 383 384 client := &fakeClient{} 385 v := New(client) 386 387 vector, _, err := v.Object(context.Background(), test.input, test.comp, ic) 388 389 require.Nil(t, err) 390 if test.expectedVectorize { 391 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 392 assert.NotEmpty(t, client.lastInput) 393 } else { 394 assert.Equal(t, []float32{0, 0, 0, 0}, vector) 395 assert.Empty(t, client.lastInput) 396 } 397 }) 398 } 399 }