github.com/weaviate/weaviate@v1.24.6/modules/text2vec-cohere/vectorizer/objects_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package vectorizer 13 14 import ( 15 "context" 16 "strings" 17 "testing" 18 19 "github.com/stretchr/testify/assert" 20 "github.com/stretchr/testify/require" 21 "github.com/weaviate/weaviate/entities/models" 22 "github.com/weaviate/weaviate/entities/moduletools" 23 "github.com/weaviate/weaviate/entities/schema" 24 ) 25 26 // These are mostly copy/pasted (with minimal additions) from the 27 // text2vec-contextionary module 28 func TestVectorizingObjects(t *testing.T) { 29 type testCase struct { 30 name string 31 input *models.Object 32 expectedClientCall string 33 expectedCohereModel string 34 noindex string 35 excludedProperty string // to simulate a schema where property names aren't vectorized 36 excludedClass string // to simulate a schema where class names aren't vectorized 37 cohereModel string 38 } 39 40 propsSchema := []*models.Property{ 41 { 42 Name: "brand", 43 DataType: schema.DataTypeText.PropString(), 44 }, 45 { 46 Name: "power", 47 DataType: schema.DataTypeInt.PropString(), 48 }, 49 { 50 Name: "review", 51 DataType: schema.DataTypeText.PropString(), 52 }, 53 { 54 Name: "brandOfTheCar", 55 DataType: schema.DataTypeText.PropString(), 56 }, 57 { 58 Name: "reviews", 59 DataType: schema.DataTypeTextArray.PropString(), 60 }, 61 } 62 63 tests := []testCase{ 64 { 65 name: "empty object", 66 input: &models.Object{ 67 Class: "Car", 68 }, 69 cohereModel: "large", 70 expectedCohereModel: "large", 71 expectedClientCall: "car", 72 }, 73 { 74 name: "object with one string prop", 75 input: &models.Object{ 76 Class: "Car", 77 Properties: map[string]interface{}{ 78 "brand": "Mercedes", 79 }, 80 }, 81 expectedClientCall: "car brand mercedes", 82 }, 83 { 84 name: "object with one non-string prop", 85 input: &models.Object{ 86 Class: "Car", 87 Properties: map[string]interface{}{ 88 "power": 300, 89 }, 90 }, 91 expectedClientCall: "car", 92 }, 93 { 94 name: "object with a mix of props", 95 input: &models.Object{ 96 Class: "Car", 97 Properties: map[string]interface{}{ 98 "brand": "best brand", 99 "power": 300, 100 "review": "a very great car", 101 }, 102 }, 103 expectedClientCall: "car brand best brand review a very great car", 104 }, 105 { 106 name: "with a noindexed property", 107 noindex: "review", 108 input: &models.Object{ 109 Class: "Car", 110 Properties: map[string]interface{}{ 111 "brand": "best brand", 112 "power": 300, 113 "review": "a very great car", 114 }, 115 }, 116 expectedClientCall: "car brand best brand", 117 }, 118 { 119 name: "with the class name not vectorized", 120 excludedClass: "Car", 121 input: &models.Object{ 122 Class: "Car", 123 Properties: map[string]interface{}{ 124 "brand": "best brand", 125 "power": 300, 126 "review": "a very great car", 127 }, 128 }, 129 expectedClientCall: "brand best brand review a very great car", 130 }, 131 { 132 name: "with a property name not vectorized", 133 excludedProperty: "review", 134 input: &models.Object{ 135 Class: "Car", 136 Properties: map[string]interface{}{ 137 "brand": "best brand", 138 "power": 300, 139 "review": "a very great car", 140 }, 141 }, 142 expectedClientCall: "car brand best brand a very great car", 143 }, 144 { 145 name: "with no schema labels vectorized", 146 excludedProperty: "review", 147 excludedClass: "Car", 148 input: &models.Object{ 149 Class: "Car", 150 Properties: map[string]interface{}{ 151 "review": "a very great car", 152 }, 153 }, 154 expectedClientCall: "a very great car", 155 }, 156 { 157 name: "with string/text arrays without propname or classname", 158 excludedProperty: "reviews", 159 excludedClass: "Car", 160 input: &models.Object{ 161 Class: "Car", 162 Properties: map[string]interface{}{ 163 "reviews": []string{ 164 "a very great car", 165 "you should consider buying one", 166 }, 167 }, 168 }, 169 expectedClientCall: "a very great car you should consider buying one", 170 }, 171 { 172 name: "with string/text arrays with propname and classname", 173 input: &models.Object{ 174 Class: "Car", 175 Properties: map[string]interface{}{ 176 "reviews": []string{ 177 "a very great car", 178 "you should consider buying one", 179 }, 180 }, 181 }, 182 expectedClientCall: "car reviews a very great car reviews you should consider buying one", 183 }, 184 { 185 name: "with compound class and prop names", 186 input: &models.Object{ 187 Class: "SuperCar", 188 Properties: map[string]interface{}{ 189 "brandOfTheCar": "best brand", 190 "power": 300, 191 "review": "a very great car", 192 }, 193 }, 194 expectedClientCall: "super car brand of the car best brand review a very great car", 195 }, 196 } 197 198 for _, test := range tests { 199 t.Run(test.name, func(t *testing.T) { 200 client := &fakeClient{} 201 202 v := New(client) 203 204 ic := &fakeClassConfig{ 205 excludedProperty: test.excludedProperty, 206 skippedProperty: test.noindex, 207 vectorizeClassName: test.excludedClass != "Car", 208 cohereModel: test.cohereModel, 209 vectorizePropertyName: true, 210 } 211 comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties) 212 vector, _, err := v.Object(context.Background(), test.input, comp, ic) 213 214 require.Nil(t, err) 215 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 216 expected := strings.Split(test.expectedClientCall, " ") 217 actual := strings.Split(client.lastInput[0], " ") 218 assert.Equal(t, expected, actual) 219 assert.Equal(t, test.expectedCohereModel, client.lastConfig.Model) 220 }) 221 } 222 } 223 224 func TestVectorizingObjectsWithDiff(t *testing.T) { 225 type testCase struct { 226 name string 227 input *models.Object 228 skipped string 229 comp moduletools.VectorizablePropsComparator 230 expectedVectorize bool 231 } 232 233 propsSchema := []*models.Property{ 234 { 235 Name: "brand", 236 DataType: schema.DataTypeText.PropString(), 237 }, 238 { 239 Name: "power", 240 DataType: schema.DataTypeInt.PropString(), 241 }, 242 { 243 Name: "description", 244 DataType: schema.DataTypeText.PropString(), 245 }, 246 { 247 Name: "reviews", 248 DataType: schema.DataTypeTextArray.PropString(), 249 }, 250 } 251 props := map[string]interface{}{ 252 "brand": "best brand", 253 "power": 300, 254 "description": "a very great car", 255 "reviews": []string{ 256 "a very great car", 257 "you should consider buying one", 258 }, 259 } 260 vector := []float32{0, 0, 0, 0} 261 var vectors models.Vectors 262 263 tests := []testCase{ 264 { 265 name: "noop comp", 266 input: &models.Object{ 267 Class: "Car", 268 Properties: props, 269 }, 270 comp: moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props), 271 expectedVectorize: true, 272 }, 273 { 274 name: "all props unchanged", 275 input: &models.Object{ 276 Class: "Car", 277 Properties: props, 278 }, 279 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors), 280 expectedVectorize: false, 281 }, 282 { 283 name: "diff one vectorizable prop changed (1)", 284 input: &models.Object{ 285 Class: "Car", 286 Properties: props, 287 }, 288 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 289 "brand": "old best brand", 290 "power": 300, 291 "description": "a very great car", 292 "reviews": []string{ 293 "a very great car", 294 "you should consider buying one", 295 }, 296 }, vector, vectors), 297 expectedVectorize: true, 298 }, 299 { 300 name: "one vectorizable prop changed (2)", 301 input: &models.Object{ 302 Class: "Car", 303 Properties: props, 304 }, 305 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 306 "brand": "best brand", 307 "power": 300, 308 "description": "old a very great car", 309 "reviews": []string{ 310 "a very great car", 311 "you should consider buying one", 312 }, 313 }, vector, vectors), 314 expectedVectorize: true, 315 }, 316 { 317 name: "one vectorizable prop changed (3)", 318 input: &models.Object{ 319 Class: "Car", 320 Properties: props, 321 }, 322 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 323 "brand": "best brand", 324 "power": 300, 325 "description": "a very great car", 326 "reviews": []string{ 327 "old a very great car", 328 "you should consider buying one", 329 }, 330 }, vector, vectors), 331 expectedVectorize: true, 332 }, 333 { 334 name: "all non-vectorizable props changed", 335 skipped: "description", 336 input: &models.Object{ 337 Class: "Car", 338 Properties: props, 339 }, 340 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 341 "brand": "best brand", 342 "power": 123, 343 "description": "old a very great car", 344 "reviews": []string{ 345 "a very great car", 346 "you should consider buying one", 347 }, 348 }, vector, vectors), 349 expectedVectorize: false, 350 }, 351 } 352 353 for _, test := range tests { 354 t.Run(test.name, func(t *testing.T) { 355 ic := &fakeClassConfig{ 356 skippedProperty: test.skipped, 357 } 358 359 client := &fakeClient{} 360 v := New(client) 361 362 vector, _, err := v.Object(context.Background(), test.input, test.comp, ic) 363 364 require.Nil(t, err) 365 if test.expectedVectorize { 366 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 367 assert.NotEmpty(t, client.lastInput) 368 } else { 369 assert.Equal(t, []float32{0, 0, 0, 0}, vector) 370 assert.Empty(t, client.lastInput) 371 } 372 }) 373 } 374 }