github.com/weaviate/weaviate@v1.24.6/modules/text2vec-aws/vectorizer/objects_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package vectorizer 13 14 import ( 15 "context" 16 "strings" 17 "testing" 18 19 "github.com/stretchr/testify/assert" 20 "github.com/stretchr/testify/require" 21 "github.com/weaviate/weaviate/entities/models" 22 "github.com/weaviate/weaviate/entities/moduletools" 23 "github.com/weaviate/weaviate/entities/schema" 24 ) 25 26 // These are mostly copy/pasted (with minimal additions) from the 27 // text2vec-contextionary module 28 func TestVectorizingObjects(t *testing.T) { 29 type testCase struct { 30 name string 31 input *models.Object 32 expectedClientCall string 33 noindex string 34 excludedProperty string // to simulate a schema where property names aren't vectorized 35 excludedClass string // to simulate a schema where class names aren't vectorized 36 awsModel string 37 } 38 39 propsSchema := []*models.Property{ 40 { 41 Name: "brand", 42 DataType: schema.DataTypeText.PropString(), 43 }, 44 { 45 Name: "power", 46 DataType: schema.DataTypeInt.PropString(), 47 }, 48 { 49 Name: "review", 50 DataType: schema.DataTypeText.PropString(), 51 }, 52 { 53 Name: "brandOfTheCar", 54 DataType: schema.DataTypeText.PropString(), 55 }, 56 { 57 Name: "reviews", 58 DataType: schema.DataTypeTextArray.PropString(), 59 }, 60 } 61 62 tests := []testCase{ 63 { 64 name: "empty object", 65 input: &models.Object{ 66 Class: "Car", 67 }, 68 awsModel: "large", 69 expectedClientCall: "car", 70 }, 71 { 72 name: "object with one string prop", 73 input: &models.Object{ 74 Class: "Car", 75 Properties: map[string]interface{}{ 76 "brand": "Mercedes", 77 }, 78 }, 79 expectedClientCall: "car brand mercedes", 80 }, 81 { 82 name: "object with one non-string prop", 83 input: &models.Object{ 84 Class: "Car", 85 Properties: map[string]interface{}{ 86 "power": 300, 87 }, 88 }, 89 expectedClientCall: "car", 90 }, 91 { 92 name: "object with a mix of props", 93 input: &models.Object{ 94 Class: "Car", 95 Properties: map[string]interface{}{ 96 "brand": "best brand", 97 "power": 300, 98 "review": "a very great car", 99 }, 100 }, 101 expectedClientCall: "car brand best brand review a very great car", 102 }, 103 { 104 name: "with a noindexed property", 105 noindex: "review", 106 input: &models.Object{ 107 Class: "Car", 108 Properties: map[string]interface{}{ 109 "brand": "best brand", 110 "power": 300, 111 "review": "a very great car", 112 }, 113 }, 114 expectedClientCall: "car brand best brand", 115 }, 116 { 117 name: "with the class name not vectorized", 118 excludedClass: "Car", 119 input: &models.Object{ 120 Class: "Car", 121 Properties: map[string]interface{}{ 122 "brand": "best brand", 123 "power": 300, 124 "review": "a very great car", 125 }, 126 }, 127 expectedClientCall: "brand best brand review a very great car", 128 }, 129 { 130 name: "with a property name not vectorized", 131 excludedProperty: "review", 132 input: &models.Object{ 133 Class: "Car", 134 Properties: map[string]interface{}{ 135 "brand": "best brand", 136 "power": 300, 137 "review": "a very great car", 138 }, 139 }, 140 expectedClientCall: "car brand best brand a very great car", 141 }, 142 { 143 name: "with no schema labels vectorized", 144 excludedProperty: "review", 145 excludedClass: "Car", 146 input: &models.Object{ 147 Class: "Car", 148 Properties: map[string]interface{}{ 149 "review": "a very great car", 150 }, 151 }, 152 expectedClientCall: "a very great car", 153 }, 154 { 155 name: "with string/text arrays without propname or classname", 156 excludedProperty: "reviews", 157 excludedClass: "Car", 158 input: &models.Object{ 159 Class: "Car", 160 Properties: map[string]interface{}{ 161 "reviews": []string{ 162 "a very great car", 163 "you should consider buying one", 164 }, 165 }, 166 }, 167 expectedClientCall: "a very great car you should consider buying one", 168 }, 169 { 170 name: "with string/text arrays with propname and classname", 171 input: &models.Object{ 172 Class: "Car", 173 Properties: map[string]interface{}{ 174 "reviews": []string{ 175 "a very great car", 176 "you should consider buying one", 177 }, 178 }, 179 }, 180 expectedClientCall: "car reviews a very great car reviews you should consider buying one", 181 }, 182 { 183 name: "with compound class and prop names", 184 input: &models.Object{ 185 Class: "SuperCar", 186 Properties: map[string]interface{}{ 187 "brandOfTheCar": "best brand", 188 "power": 300, 189 "review": "a very great car", 190 }, 191 }, 192 expectedClientCall: "super car brand of the car best brand review a very great car", 193 }, 194 } 195 196 for _, test := range tests { 197 t.Run(test.name, func(t *testing.T) { 198 client := &fakeClient{} 199 200 v := New(client) 201 202 ic := &fakeClassConfig{ 203 skippedProperty: test.noindex, 204 vectorizeClassName: test.excludedClass != "Car", 205 excludedProperty: test.excludedProperty, 206 service: "", 207 region: "", 208 model: "", 209 endpoint: "", 210 targetModel: "", 211 targetVariant: "", 212 vectorizePropertyName: true, 213 } 214 comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties) 215 vector, _, err := v.Object(context.Background(), test.input, comp, ic) 216 217 require.Nil(t, err) 218 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 219 expected := strings.Split(test.expectedClientCall, " ") 220 actual := strings.Split(client.lastInput[0], " ") 221 assert.Equal(t, expected, actual) 222 }) 223 } 224 } 225 226 func TestVectorizingObjectsWithDiff(t *testing.T) { 227 type testCase struct { 228 name string 229 input *models.Object 230 skipped string 231 comp moduletools.VectorizablePropsComparator 232 expectedVectorize bool 233 } 234 235 propsSchema := []*models.Property{ 236 { 237 Name: "brand", 238 DataType: schema.DataTypeText.PropString(), 239 }, 240 { 241 Name: "power", 242 DataType: schema.DataTypeInt.PropString(), 243 }, 244 { 245 Name: "description", 246 DataType: schema.DataTypeText.PropString(), 247 }, 248 { 249 Name: "reviews", 250 DataType: schema.DataTypeTextArray.PropString(), 251 }, 252 } 253 props := map[string]interface{}{ 254 "brand": "best brand", 255 "power": 300, 256 "description": "a very great car", 257 "reviews": []string{ 258 "a very great car", 259 "you should consider buying one", 260 }, 261 } 262 vector := []float32{0, 0, 0, 0} 263 var vectors models.Vectors 264 265 tests := []testCase{ 266 { 267 name: "noop comp", 268 input: &models.Object{ 269 Class: "Car", 270 Properties: props, 271 }, 272 comp: moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props), 273 expectedVectorize: true, 274 }, 275 { 276 name: "all props unchanged", 277 input: &models.Object{ 278 Class: "Car", 279 Properties: props, 280 }, 281 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors), 282 expectedVectorize: false, 283 }, 284 { 285 name: "one vectorizable prop changed (1)", 286 input: &models.Object{ 287 Class: "Car", 288 Properties: props, 289 }, 290 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 291 "brand": "old best brand", 292 "power": 300, 293 "description": "a very great car", 294 "reviews": []string{ 295 "a very great car", 296 "you should consider buying one", 297 }, 298 }, vector, vectors), 299 expectedVectorize: true, 300 }, 301 { 302 name: "one vectorizable prop changed (2)", 303 input: &models.Object{ 304 Class: "Car", 305 Properties: props, 306 }, 307 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 308 "brand": "best brand", 309 "power": 300, 310 "description": "old a very great car", 311 "reviews": []string{ 312 "a very great car", 313 "you should consider buying one", 314 }, 315 }, vector, vectors), 316 expectedVectorize: true, 317 }, 318 { 319 name: "one vectorizable prop changed (3)", 320 input: &models.Object{ 321 Class: "Car", 322 Properties: props, 323 }, 324 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 325 "brand": "best brand", 326 "power": 300, 327 "description": "a very great car", 328 "reviews": []string{ 329 "old a very great car", 330 "you should consider buying one", 331 }, 332 }, vector, vectors), 333 expectedVectorize: true, 334 }, 335 { 336 name: "all non-vectorizable props changed", 337 skipped: "description", 338 input: &models.Object{ 339 Class: "Car", 340 Properties: props, 341 }, 342 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 343 "brand": "best brand", 344 "power": 123, 345 "description": "old a very great car", 346 "reviews": []string{ 347 "a very great car", 348 "you should consider buying one", 349 }, 350 }, vector, vectors), 351 expectedVectorize: false, 352 }, 353 } 354 355 for _, test := range tests { 356 t.Run(test.name, func(t *testing.T) { 357 ic := &fakeClassConfig{ 358 skippedProperty: test.skipped, 359 } 360 361 client := &fakeClient{} 362 v := New(client) 363 364 vector, _, err := v.Object(context.Background(), test.input, test.comp, ic) 365 366 require.Nil(t, err) 367 if test.expectedVectorize { 368 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 369 assert.NotEmpty(t, client.lastInput) 370 } else { 371 assert.Equal(t, []float32{0, 0, 0, 0}, vector) 372 assert.Empty(t, client.lastInput) 373 } 374 }) 375 } 376 }