github.com/weaviate/weaviate@v1.24.6/modules/text2vec-transformers/vectorizer/objects_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package vectorizer 13 14 import ( 15 "context" 16 "strings" 17 "testing" 18 19 "github.com/stretchr/testify/assert" 20 "github.com/stretchr/testify/require" 21 "github.com/weaviate/weaviate/entities/models" 22 "github.com/weaviate/weaviate/entities/moduletools" 23 "github.com/weaviate/weaviate/entities/schema" 24 ) 25 26 // These are mostly copy/pasted (with minimal additions) from the 27 // text2vec-contextionary module 28 func TestVectorizingObjects(t *testing.T) { 29 type testCase struct { 30 name string 31 input *models.Object 32 expectedClientCall string 33 expectedPoolingStrategy string 34 noindex string 35 excludedProperty string // to simulate a schema where property names aren't vectorized 36 excludedClass string // to simulate a schema where class names aren't vectorized 37 poolingStrategy string 38 } 39 40 propsSchema := []*models.Property{ 41 { 42 Name: "brand", 43 DataType: schema.DataTypeText.PropString(), 44 }, 45 { 46 Name: "power", 47 DataType: schema.DataTypeInt.PropString(), 48 }, 49 { 50 Name: "review", 51 DataType: schema.DataTypeText.PropString(), 52 }, 53 { 54 Name: "brandOfTheCar", 55 DataType: schema.DataTypeText.PropString(), 56 }, 57 { 58 Name: "reviews", 59 DataType: schema.DataTypeTextArray.PropString(), 60 }, 61 } 62 63 tests := []testCase{ 64 { 65 name: "empty object", 66 input: &models.Object{ 67 Class: "Car", 68 }, 69 poolingStrategy: "cls", 70 expectedPoolingStrategy: "cls", 71 expectedClientCall: "car", 72 }, 73 { 74 name: "object with one string prop", 75 input: &models.Object{ 76 Class: "Car", 77 Properties: map[string]interface{}{ 78 "brand": "Mercedes", 79 }, 80 }, 81 expectedClientCall: "car brand mercedes", 82 }, 83 84 { 85 name: "object with one non-string prop", 86 input: &models.Object{ 87 Class: "Car", 88 Properties: map[string]interface{}{ 89 "power": 300, 90 }, 91 }, 92 expectedClientCall: "car", 93 }, 94 95 { 96 name: "object with a mix of props", 97 input: &models.Object{ 98 Class: "Car", 99 Properties: map[string]interface{}{ 100 "brand": "best brand", 101 "power": 300, 102 "review": "a very great car", 103 }, 104 }, 105 expectedClientCall: "car brand best brand review a very great car", 106 }, 107 { 108 name: "with a noindexed property", 109 noindex: "review", 110 input: &models.Object{ 111 Class: "Car", 112 Properties: map[string]interface{}{ 113 "brand": "best brand", 114 "power": 300, 115 "review": "a very great car", 116 }, 117 }, 118 expectedClientCall: "car brand best brand", 119 }, 120 121 { 122 name: "with the class name not vectorized", 123 excludedClass: "Car", 124 input: &models.Object{ 125 Class: "Car", 126 Properties: map[string]interface{}{ 127 "brand": "best brand", 128 "power": 300, 129 "review": "a very great car", 130 }, 131 }, 132 expectedClientCall: "brand best brand review a very great car", 133 }, 134 135 { 136 name: "with a property name not vectorized", 137 excludedProperty: "review", 138 input: &models.Object{ 139 Class: "Car", 140 Properties: map[string]interface{}{ 141 "brand": "best brand", 142 "power": 300, 143 "review": "a very great car", 144 }, 145 }, 146 expectedClientCall: "car brand best brand a very great car", 147 }, 148 149 { 150 name: "with no schema labels vectorized", 151 excludedProperty: "review", 152 excludedClass: "Car", 153 input: &models.Object{ 154 Class: "Car", 155 Properties: map[string]interface{}{ 156 "review": "a very great car", 157 }, 158 }, 159 expectedClientCall: "a very great car", 160 }, 161 162 { 163 name: "with string/text arrays without propname or classname", 164 excludedProperty: "reviews", 165 excludedClass: "Car", 166 input: &models.Object{ 167 Class: "Car", 168 Properties: map[string]interface{}{ 169 "reviews": []string{ 170 "a very great car", 171 "you should consider buying one", 172 }, 173 }, 174 }, 175 expectedClientCall: "a very great car you should consider buying one", 176 }, 177 178 { 179 name: "with string/text arrays with propname and classname", 180 input: &models.Object{ 181 Class: "Car", 182 Properties: map[string]interface{}{ 183 "reviews": []string{ 184 "a very great car", 185 "you should consider buying one", 186 }, 187 }, 188 }, 189 expectedClientCall: "car reviews a very great car reviews you should consider buying one", 190 }, 191 192 { 193 name: "with compound class and prop names", 194 input: &models.Object{ 195 Class: "SuperCar", 196 Properties: map[string]interface{}{ 197 "brandOfTheCar": "best brand", 198 "power": 300, 199 "review": "a very great car", 200 }, 201 }, 202 expectedClientCall: "super car brand of the car best brand review a very great car", 203 }, 204 } 205 206 for _, test := range tests { 207 t.Run(test.name, func(t *testing.T) { 208 client := &fakeClient{} 209 210 v := New(client) 211 212 ic := &fakeClassConfig{ 213 excludedProperty: test.excludedProperty, 214 skippedProperty: test.noindex, 215 vectorizeClassName: test.excludedClass != "Car", 216 poolingStrategy: test.poolingStrategy, 217 vectorizePropertyName: true, 218 } 219 comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties) 220 vector, _, err := v.Object(context.Background(), test.input, comp, ic) 221 222 require.Nil(t, err) 223 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 224 expected := strings.Split(test.expectedClientCall, " ") 225 actual := strings.Split(client.lastInput, " ") 226 assert.Equal(t, expected, actual) 227 assert.Equal(t, client.lastConfig.PoolingStrategy, test.expectedPoolingStrategy) 228 }) 229 } 230 } 231 232 func TestVectorizingObjectsWithDiff(t *testing.T) { 233 type testCase struct { 234 name string 235 input *models.Object 236 skipped string 237 comp moduletools.VectorizablePropsComparator 238 expectedVectorize bool 239 } 240 241 propsSchema := []*models.Property{ 242 { 243 Name: "brand", 244 DataType: schema.DataTypeText.PropString(), 245 }, 246 { 247 Name: "power", 248 DataType: schema.DataTypeInt.PropString(), 249 }, 250 { 251 Name: "description", 252 DataType: schema.DataTypeText.PropString(), 253 }, 254 { 255 Name: "reviews", 256 DataType: schema.DataTypeTextArray.PropString(), 257 }, 258 } 259 props := map[string]interface{}{ 260 "brand": "best brand", 261 "power": 300, 262 "description": "a very great car", 263 "reviews": []string{ 264 "a very great car", 265 "you should consider buying one", 266 }, 267 } 268 vector := []float32{0, 0, 0, 0} 269 var vectors models.Vectors 270 271 tests := []testCase{ 272 { 273 name: "noop comp", 274 input: &models.Object{ 275 Class: "Car", 276 Properties: props, 277 }, 278 comp: moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props), 279 expectedVectorize: true, 280 }, 281 { 282 name: "all props unchanged", 283 input: &models.Object{ 284 Class: "Car", 285 Properties: props, 286 }, 287 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors), 288 expectedVectorize: false, 289 }, 290 { 291 name: "one vectorizable prop changed (1)", 292 input: &models.Object{ 293 Class: "Car", 294 Properties: props, 295 }, 296 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 297 "brand": "old best brand", 298 "power": 300, 299 "description": "a very great car", 300 "reviews": []string{ 301 "a very great car", 302 "you should consider buying one", 303 }, 304 }, vector, vectors), 305 expectedVectorize: true, 306 }, 307 { 308 name: "one vectorizable prop changed (2)", 309 input: &models.Object{ 310 Class: "Car", 311 Properties: props, 312 }, 313 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 314 "brand": "best brand", 315 "power": 300, 316 "description": "old a very great car", 317 "reviews": []string{ 318 "a very great car", 319 "you should consider buying one", 320 }, 321 }, vector, vectors), 322 expectedVectorize: true, 323 }, 324 { 325 name: "one vectorizable prop changed (3)", 326 input: &models.Object{ 327 Class: "Car", 328 Properties: props, 329 }, 330 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 331 "brand": "best brand", 332 "power": 300, 333 "description": "a very great car", 334 "reviews": []string{ 335 "old a very great car", 336 "you should consider buying one", 337 }, 338 }, vector, vectors), 339 expectedVectorize: true, 340 }, 341 { 342 name: "all non-vectorizable props changed", 343 skipped: "description", 344 input: &models.Object{ 345 Class: "Car", 346 Properties: props, 347 }, 348 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 349 "brand": "best brand", 350 "power": 123, 351 "description": "old a very great car", 352 "reviews": []string{ 353 "a very great car", 354 "you should consider buying one", 355 }, 356 }, vector, vectors), 357 expectedVectorize: false, 358 }, 359 } 360 361 for _, test := range tests { 362 t.Run(test.name, func(t *testing.T) { 363 ic := &fakeClassConfig{ 364 skippedProperty: test.skipped, 365 } 366 367 client := &fakeClient{} 368 v := New(client) 369 370 vector, _, err := v.Object(context.Background(), test.input, test.comp, ic) 371 372 require.Nil(t, err) 373 if test.expectedVectorize { 374 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 375 assert.NotEmpty(t, client.lastInput) 376 } else { 377 assert.Equal(t, []float32{0, 0, 0, 0}, vector) 378 assert.Empty(t, client.lastInput) 379 } 380 }) 381 } 382 }