github.com/weaviate/weaviate@v1.24.6/modules/text2vec-huggingface/vectorizer/objects_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package vectorizer 13 14 import ( 15 "context" 16 "strings" 17 "testing" 18 19 "github.com/stretchr/testify/assert" 20 "github.com/stretchr/testify/require" 21 "github.com/weaviate/weaviate/entities/models" 22 "github.com/weaviate/weaviate/entities/moduletools" 23 "github.com/weaviate/weaviate/entities/schema" 24 ) 25 26 // These are mostly copy/pasted (with minimal additions) from the 27 // text2vec-contextionary module 28 func TestVectorizingObjects(t *testing.T) { 29 type testCase struct { 30 name string 31 input *models.Object 32 expectedClientCall string 33 expectedHuggingFaceModel string 34 noindex string 35 excludedProperty string // to simulate a schema where property names aren't vectorized 36 excludedClass string // to simulate a schema where class names aren't vectorized 37 passageModel string 38 endpointURL string 39 } 40 41 propsSchema := []*models.Property{ 42 { 43 Name: "brand", 44 DataType: schema.DataTypeText.PropString(), 45 }, 46 { 47 Name: "power", 48 DataType: schema.DataTypeInt.PropString(), 49 }, 50 { 51 Name: "review", 52 DataType: schema.DataTypeText.PropString(), 53 }, 54 { 55 Name: "brandOfTheCar", 56 DataType: schema.DataTypeText.PropString(), 57 }, 58 { 59 Name: "reviews", 60 DataType: schema.DataTypeTextArray.PropString(), 61 }, 62 } 63 64 tests := []testCase{ 65 { 66 name: "empty object", 67 input: &models.Object{ 68 Class: "Car", 69 }, 70 passageModel: "sentence-transformers/gtr-t5-xl", 71 expectedHuggingFaceModel: "sentence-transformers/gtr-t5-xl", 72 expectedClientCall: "car", 73 }, 74 { 75 name: "object with one string prop", 76 input: &models.Object{ 77 Class: "Car", 78 Properties: map[string]interface{}{ 79 "brand": "Mercedes", 80 }, 81 }, 82 expectedClientCall: "car brand mercedes", 83 }, 84 { 85 name: "object with one non-string prop", 86 input: &models.Object{ 87 Class: "Car", 88 Properties: map[string]interface{}{ 89 "power": 300, 90 }, 91 }, 92 expectedClientCall: "car", 93 }, 94 { 95 name: "object with a mix of props", 96 input: &models.Object{ 97 Class: "Car", 98 Properties: map[string]interface{}{ 99 "brand": "best brand", 100 "power": 300, 101 "review": "a very great car", 102 }, 103 }, 104 expectedClientCall: "car brand best brand review a very great car", 105 }, 106 { 107 name: "with a noindexed property", 108 noindex: "review", 109 input: &models.Object{ 110 Class: "Car", 111 Properties: map[string]interface{}{ 112 "brand": "best brand", 113 "power": 300, 114 "review": "a very great car", 115 }, 116 }, 117 expectedClientCall: "car brand best brand", 118 }, 119 { 120 name: "with the class name not vectorized", 121 excludedClass: "Car", 122 input: &models.Object{ 123 Class: "Car", 124 Properties: map[string]interface{}{ 125 "brand": "best brand", 126 "power": 300, 127 "review": "a very great car", 128 }, 129 }, 130 expectedClientCall: "brand best brand review a very great car", 131 }, 132 { 133 name: "with a property name not vectorized", 134 excludedProperty: "review", 135 input: &models.Object{ 136 Class: "Car", 137 Properties: map[string]interface{}{ 138 "brand": "best brand", 139 "power": 300, 140 "review": "a very great car", 141 }, 142 }, 143 expectedClientCall: "car brand best brand a very great car", 144 }, 145 { 146 name: "with no schema labels vectorized", 147 excludedProperty: "review", 148 excludedClass: "Car", 149 input: &models.Object{ 150 Class: "Car", 151 Properties: map[string]interface{}{ 152 "review": "a very great car", 153 }, 154 }, 155 expectedClientCall: "a very great car", 156 }, 157 { 158 name: "with string/text arrays without propname or classname", 159 excludedProperty: "reviews", 160 excludedClass: "Car", 161 input: &models.Object{ 162 Class: "Car", 163 Properties: map[string]interface{}{ 164 "reviews": []string{ 165 "a very great car", 166 "you should consider buying one", 167 }, 168 }, 169 }, 170 expectedClientCall: "a very great car you should consider buying one", 171 }, 172 { 173 name: "with string/text arrays with propname and classname", 174 input: &models.Object{ 175 Class: "Car", 176 Properties: map[string]interface{}{ 177 "reviews": []string{ 178 "a very great car", 179 "you should consider buying one", 180 }, 181 }, 182 }, 183 expectedClientCall: "car reviews a very great car reviews you should consider buying one", 184 }, 185 { 186 name: "with compound class and prop names", 187 input: &models.Object{ 188 Class: "SuperCar", 189 Properties: map[string]interface{}{ 190 "brandOfTheCar": "best brand", 191 "power": 300, 192 "review": "a very great car", 193 }, 194 }, 195 expectedClientCall: "super car brand of the car best brand review a very great car", 196 }, 197 { 198 name: "empty object with HF Inference Endpoint", 199 input: &models.Object{ 200 Class: "Car", 201 }, 202 endpointURL: "https://url.cloud", 203 expectedClientCall: "car", 204 }, 205 } 206 207 for _, test := range tests { 208 t.Run(test.name, func(t *testing.T) { 209 client := &fakeClient{} 210 211 v := New(client) 212 213 ic := &fakeClassConfig{ 214 excludedProperty: test.excludedProperty, 215 skippedProperty: test.noindex, 216 vectorizeClassName: test.excludedClass != "Car", 217 passageModel: test.passageModel, 218 endpointURL: test.endpointURL, 219 vectorizePropertyName: true, 220 } 221 comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties) 222 vector, _, err := v.Object(context.Background(), test.input, comp, ic) 223 224 require.Nil(t, err) 225 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 226 expected := strings.Split(test.expectedClientCall, " ") 227 actual := strings.Split(client.lastInput, " ") 228 assert.Equal(t, expected, actual) 229 if test.expectedHuggingFaceModel != "" { 230 assert.Equal(t, test.expectedHuggingFaceModel, client.lastConfig.Model) 231 } 232 }) 233 } 234 } 235 236 func TestVectorizingObjectsWithDiff(t *testing.T) { 237 type testCase struct { 238 name string 239 input *models.Object 240 skipped string 241 comp moduletools.VectorizablePropsComparator 242 expectedVectorize bool 243 } 244 245 propsSchema := []*models.Property{ 246 { 247 Name: "brand", 248 DataType: schema.DataTypeText.PropString(), 249 }, 250 { 251 Name: "power", 252 DataType: schema.DataTypeInt.PropString(), 253 }, 254 { 255 Name: "description", 256 DataType: schema.DataTypeText.PropString(), 257 }, 258 { 259 Name: "reviews", 260 DataType: schema.DataTypeTextArray.PropString(), 261 }, 262 } 263 props := map[string]interface{}{ 264 "brand": "best brand", 265 "power": 300, 266 "description": "a very great car", 267 "reviews": []string{ 268 "a very great car", 269 "you should consider buying one", 270 }, 271 } 272 vector := []float32{0, 0, 0, 0} 273 var vectors models.Vectors 274 275 tests := []testCase{ 276 { 277 name: "noop comp", 278 input: &models.Object{ 279 Class: "Car", 280 Properties: props, 281 }, 282 comp: moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props), 283 expectedVectorize: true, 284 }, 285 { 286 name: "all props unchanged", 287 input: &models.Object{ 288 Class: "Car", 289 Properties: props, 290 }, 291 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors), 292 expectedVectorize: false, 293 }, 294 { 295 name: "one vectorizable prop changed (1)", 296 input: &models.Object{ 297 Class: "Car", 298 Properties: props, 299 }, 300 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 301 "brand": "old best brand", 302 "power": 300, 303 "description": "a very great car", 304 "reviews": []string{ 305 "a very great car", 306 "you should consider buying one", 307 }, 308 }, vector, vectors), 309 expectedVectorize: true, 310 }, 311 { 312 name: "one vectorizable prop changed (2)", 313 input: &models.Object{ 314 Class: "Car", 315 Properties: props, 316 }, 317 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 318 "brand": "best brand", 319 "power": 300, 320 "description": "old a very great car", 321 "reviews": []string{ 322 "a very great car", 323 "you should consider buying one", 324 }, 325 }, vector, vectors), 326 expectedVectorize: true, 327 }, 328 { 329 name: "one vectorizable prop changed (3)", 330 input: &models.Object{ 331 Class: "Car", 332 Properties: props, 333 }, 334 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 335 "brand": "best brand", 336 "power": 300, 337 "description": "a very great car", 338 "reviews": []string{ 339 "old a very great car", 340 "you should consider buying one", 341 }, 342 }, vector, vectors), 343 expectedVectorize: true, 344 }, 345 { 346 name: "all non-vectorizable props changed", 347 skipped: "description", 348 input: &models.Object{ 349 Class: "Car", 350 Properties: props, 351 }, 352 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 353 "brand": "best brand", 354 "power": 123, 355 "description": "old a very great car", 356 "reviews": []string{ 357 "a very great car", 358 "you should consider buying one", 359 }, 360 }, vector, vectors), 361 expectedVectorize: false, 362 }, 363 } 364 365 for _, test := range tests { 366 t.Run(test.name, func(t *testing.T) { 367 ic := &fakeClassConfig{ 368 skippedProperty: test.skipped, 369 } 370 371 client := &fakeClient{} 372 v := New(client) 373 374 vector, _, err := v.Object(context.Background(), test.input, test.comp, ic) 375 376 require.Nil(t, err) 377 if test.expectedVectorize { 378 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 379 assert.NotEmpty(t, client.lastInput) 380 } else { 381 assert.Equal(t, []float32{0, 0, 0, 0}, vector) 382 assert.Empty(t, client.lastInput) 383 } 384 }) 385 } 386 }