github.com/weaviate/weaviate@v1.24.6/modules/text2vec-palm/vectorizer/objects_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package vectorizer 13 14 import ( 15 "context" 16 "strings" 17 "testing" 18 19 "github.com/stretchr/testify/assert" 20 "github.com/stretchr/testify/require" 21 "github.com/weaviate/weaviate/entities/models" 22 "github.com/weaviate/weaviate/entities/moduletools" 23 "github.com/weaviate/weaviate/entities/schema" 24 ) 25 26 // These are mostly copy/pasted (with minimal additions) from the 27 // text2vec-contextionary module 28 func TestVectorizingObjects(t *testing.T) { 29 type testCase struct { 30 name string 31 input *models.Object 32 expectedClientCall string 33 noindex string 34 excludedProperty string // to simulate a schema where property names aren't vectorized 35 excludedClass string // to simulate a schema where class names aren't vectorized 36 palmModel string 37 } 38 39 propsSchema := []*models.Property{ 40 { 41 Name: "brand", 42 DataType: schema.DataTypeText.PropString(), 43 }, 44 { 45 Name: "power", 46 DataType: schema.DataTypeInt.PropString(), 47 }, 48 { 49 Name: "review", 50 DataType: schema.DataTypeText.PropString(), 51 }, 52 { 53 Name: "brandOfTheCar", 54 DataType: schema.DataTypeText.PropString(), 55 }, 56 { 57 Name: "reviews", 58 DataType: schema.DataTypeTextArray.PropString(), 59 }, 60 } 61 62 tests := []testCase{ 63 { 64 name: "empty object", 65 input: &models.Object{ 66 Class: "Car", 67 }, 68 palmModel: "large", 69 expectedClientCall: "car", 70 }, 71 { 72 name: "object with one string prop", 73 input: &models.Object{ 74 Class: "Car", 75 Properties: map[string]interface{}{ 76 "brand": "Mercedes", 77 }, 78 }, 79 expectedClientCall: "car brand mercedes", 80 }, 81 { 82 name: "object with one non-string prop", 83 input: &models.Object{ 84 Class: "Car", 85 Properties: map[string]interface{}{ 86 "power": 300, 87 }, 88 }, 89 expectedClientCall: "car", 90 }, 91 { 92 name: "object with a mix of props", 93 input: &models.Object{ 94 Class: "Car", 95 Properties: map[string]interface{}{ 96 "brand": "best brand", 97 "power": 300, 98 "review": "a very great car", 99 }, 100 }, 101 expectedClientCall: "car brand best brand review a very great car", 102 }, 103 { 104 name: "with a noindexed property", 105 noindex: "review", 106 input: &models.Object{ 107 Class: "Car", 108 Properties: map[string]interface{}{ 109 "brand": "best brand", 110 "power": 300, 111 "review": "a very great car", 112 }, 113 }, 114 expectedClientCall: "car brand best brand", 115 }, 116 { 117 name: "with the class name not vectorized", 118 excludedClass: "Car", 119 input: &models.Object{ 120 Class: "Car", 121 Properties: map[string]interface{}{ 122 "brand": "best brand", 123 "power": 300, 124 "review": "a very great car", 125 }, 126 }, 127 expectedClientCall: "brand best brand review a very great car", 128 }, 129 { 130 name: "with a property name not vectorized", 131 excludedProperty: "review", 132 input: &models.Object{ 133 Class: "Car", 134 Properties: map[string]interface{}{ 135 "brand": "best brand", 136 "power": 300, 137 "review": "a very great car", 138 }, 139 }, 140 expectedClientCall: "car brand best brand a very great car", 141 }, 142 { 143 name: "with no schema labels vectorized", 144 excludedProperty: "review", 145 excludedClass: "Car", 146 input: &models.Object{ 147 Class: "Car", 148 Properties: map[string]interface{}{ 149 "review": "a very great car", 150 }, 151 }, 152 expectedClientCall: "a very great car", 153 }, 154 { 155 name: "with string/text arrays without propname or classname", 156 excludedProperty: "reviews", 157 excludedClass: "Car", 158 input: &models.Object{ 159 Class: "Car", 160 Properties: map[string]interface{}{ 161 "reviews": []string{ 162 "a very great car", 163 "you should consider buying one", 164 }, 165 }, 166 }, 167 expectedClientCall: "a very great car you should consider buying one", 168 }, 169 { 170 name: "with string/text arrays with propname and classname", 171 input: &models.Object{ 172 Class: "Car", 173 Properties: map[string]interface{}{ 174 "reviews": []string{ 175 "a very great car", 176 "you should consider buying one", 177 }, 178 }, 179 }, 180 expectedClientCall: "car reviews a very great car reviews you should consider buying one", 181 }, 182 { 183 name: "with compound class and prop names", 184 input: &models.Object{ 185 Class: "SuperCar", 186 Properties: map[string]interface{}{ 187 "brandOfTheCar": "best brand", 188 "power": 300, 189 "review": "a very great car", 190 }, 191 }, 192 expectedClientCall: "super car brand of the car best brand review a very great car", 193 }, 194 } 195 196 for _, test := range tests { 197 t.Run(test.name, func(t *testing.T) { 198 client := &fakeClient{} 199 200 v := New(client) 201 202 ic := &fakeClassConfig{ 203 skippedProperty: test.noindex, 204 vectorizeClassName: test.excludedClass != "Car", 205 excludedProperty: test.excludedProperty, 206 vectorizePropertyName: true, 207 apiEndpoint: "", 208 projectID: "", 209 endpointID: "", 210 modelID: "", 211 } 212 comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties) 213 vector, _, err := v.Object(context.Background(), test.input, comp, ic) 214 215 require.Nil(t, err) 216 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 217 expected := strings.Split(test.expectedClientCall, " ") 218 actual := strings.Split(client.lastInput[0], " ") 219 assert.Equal(t, expected, actual) 220 }) 221 } 222 } 223 224 func TestVectorizingObjectsWithDiff(t *testing.T) { 225 type testCase struct { 226 name string 227 input *models.Object 228 skipped string 229 comp moduletools.VectorizablePropsComparator 230 expectedVectorize bool 231 } 232 233 propsSchema := []*models.Property{ 234 { 235 Name: "brand", 236 DataType: schema.DataTypeText.PropString(), 237 }, 238 { 239 Name: "power", 240 DataType: schema.DataTypeInt.PropString(), 241 }, 242 { 243 Name: "description", 244 DataType: schema.DataTypeText.PropString(), 245 }, 246 { 247 Name: "reviews", 248 DataType: schema.DataTypeTextArray.PropString(), 249 }, 250 } 251 props := map[string]interface{}{ 252 "brand": "best brand", 253 "power": 300, 254 "description": "a very great car", 255 "reviews": []string{ 256 "a very great car", 257 "you should consider buying one", 258 }, 259 } 260 vector := []float32{0, 0, 0, 0} 261 var vectors models.Vectors 262 263 tests := []testCase{ 264 { 265 name: "noop comp", 266 input: &models.Object{ 267 Class: "Car", 268 Properties: props, 269 }, 270 comp: moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props), 271 expectedVectorize: true, 272 }, 273 { 274 name: "all props unchanged", 275 input: &models.Object{ 276 Class: "Car", 277 Properties: props, 278 }, 279 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors), 280 expectedVectorize: false, 281 }, 282 { 283 name: "one vectorizable prop changed (1)", 284 input: &models.Object{ 285 Class: "Car", 286 Properties: props, 287 }, 288 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 289 "brand": "old best brand", 290 "power": 300, 291 "description": "a very great car", 292 "reviews": []string{ 293 "a very great car", 294 "you should consider buying one", 295 }, 296 }, vector, vectors), 297 expectedVectorize: true, 298 }, 299 { 300 name: "diff one vectorizable prop changed (2)", 301 input: &models.Object{ 302 Class: "Car", 303 Properties: props, 304 }, 305 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 306 "brand": "best brand", 307 "power": 300, 308 "description": "old a very great car", 309 "reviews": []string{ 310 "a very great car", 311 "you should consider buying one", 312 }, 313 }, vector, vectors), 314 expectedVectorize: true, 315 }, 316 { 317 name: "one vectorizable prop changed (3)", 318 input: &models.Object{ 319 Class: "Car", 320 Properties: props, 321 }, 322 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 323 "brand": "best brand", 324 "power": 300, 325 "description": "a very great car", 326 "reviews": []string{ 327 "old a very great car", 328 "you should consider buying one", 329 }, 330 }, vector, vectors), 331 expectedVectorize: true, 332 }, 333 { 334 name: "all non-vectorizable props changed", 335 skipped: "description", 336 input: &models.Object{ 337 Class: "Car", 338 Properties: props, 339 }, 340 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 341 "brand": "best brand", 342 "power": 123, 343 "description": "old a very great car", 344 "reviews": []string{ 345 "a very great car", 346 "you should consider buying one", 347 }, 348 }, vector, vectors), 349 expectedVectorize: false, 350 }, 351 } 352 353 for _, test := range tests { 354 t.Run(test.name, func(t *testing.T) { 355 ic := &fakeClassConfig{ 356 skippedProperty: test.skipped, 357 } 358 359 client := &fakeClient{} 360 v := New(client) 361 362 vector, _, err := v.Object(context.Background(), test.input, test.comp, ic) 363 364 require.Nil(t, err) 365 if test.expectedVectorize { 366 assert.Equal(t, []float32{0, 1, 2, 3}, vector) 367 assert.NotEmpty(t, client.lastInput) 368 } else { 369 assert.Equal(t, []float32{0, 0, 0, 0}, vector) 370 assert.Empty(t, client.lastInput) 371 } 372 }) 373 } 374 }