github.com/weaviate/weaviate@v1.24.6/modules/multi2vec-bind/vectorizer/vectorizer_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package vectorizer 13 14 import ( 15 "context" 16 "testing" 17 18 "github.com/stretchr/testify/assert" 19 "github.com/stretchr/testify/require" 20 "github.com/weaviate/weaviate/entities/models" 21 "github.com/weaviate/weaviate/entities/moduletools" 22 "github.com/weaviate/weaviate/entities/schema" 23 ) 24 25 const image = "iVBORw0KGgoAAAANSUhEUgAAAGAAAAA/CAYAAAAfQM0aAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAyRpVFh0WE1MOmNvbS5hZG9iZS54bXAAAAAAADw/eHBhY2tldCBiZWdpbj0i77u/IiBpZD0iVzVNME1wQ2VoaUh6cmVTek5UY3prYzlkIj8+IDx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6bWV0YS8iIHg6eG1wdGs9IkFkb2JlIFhNUCBDb3JlIDUuMy1jMDExIDY2LjE0NTY2MSwgMjAxMi8wMi8wNi0xNDo1NjoyNyAgICAgICAgIj4gPHJkZjpSREYgeG1sbnM6cmRmPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1zeW50YXgtbnMjIj4gPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIgeG1sbnM6eG1wPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvIiB4bWxuczp4bXBNTT0iaHR0cDovL25zLmFkb2JlLmNvbS94YXAvMS4wL21tLyIgeG1sbnM6c3RSZWY9Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9zVHlwZS9SZXNvdXJjZVJlZiMiIHhtcDpDcmVhdG9yVG9vbD0iQWRvYmUgUGhvdG9zaG9wIENTNiAoTWFjaW50b3NoKSIgeG1wTU06SW5zdGFuY2VJRD0ieG1wLmlpZDpCRjQ5NEM3RDI5QTkxMUUyOTc1NENCMzI4N0QwNDNCOSIgeG1wTU06RG9jdW1lbnRJRD0ieG1wLmRpZDpCRjQ5NEM3RTI5QTkxMUUyOTc1NENCMzI4N0QwNDNCOSI+IDx4bXBNTTpEZXJpdmVkRnJvbSBzdFJlZjppbnN0YW5jZUlEPSJ4bXAuaWlkOkJGNDk0QzdCMjlBOTExRTI5NzU0Q0IzMjg3RDA0M0I5IiBzdFJlZjpkb2N1bWVudElEPSJ4bXAuZGlkOkJGNDk0QzdDMjlBOTExRTI5NzU0Q0IzMjg3RDA0M0I5Ii8+IDwvcmRmOkRlc2NyaXB0aW9uPiA8L3JkZjpSREY+IDwveDp4bXBtZXRhPiA8P3hwYWNrZXQgZW5kPSJyIj8+WeGRxAAAB2hJREFUeNrUXFtslUUQ3hJCoQVEKy0k1qQgrRg0vaAJaq1tvJSgaLy8mKDF2IvxBY2Bgm8+iIoxvhB72tTUmKgPigbFKCEtxeKD9hZjAi3GJrYJtqRai7TQB+pMz/zwU/5zzsxe2u4kXwiwZ+bb/Xb/s7v/zEmrra1VTFsFeBRQCtgEuBWwkv5vHPAn4DdAB+B7wBjXcUNDQ8o2dXV1SmDzyhUtLS3tBPyxC9CdrN1ihi/swKuA7YD0BG1uJhQDngdcAnwDeJ86Ole2kLii+J2AFsA+wF9RjRalmEUHaZY8m6RDUYZtn6HPHiRfLm2hck0D7AScAdRH8UokwD2AnwA7UoiUyhaRD/S12dHg+8B1OWA/4BTgqVQCPEJL8haLBNDXEfJt03ziipYH+BJwHFAYJcAWwCeAZQ6CLyPfWyz584nrbCuj74eHwgKsddih2R1ba+jHJ65R1k6PuWNhAd4DZM/BTiWbdhwm5hPXsA0AngY8COgNP4JwSTyu4zE/P18VFhZKP7aNYuouXxFX5Ic8Nc2Ea2D/AfYCNgIORZ0DdusOfnFxcXDwUD09PZKP76alKDUR16KiIlVQUHDl7/39/Uozpg7Xac45YB0dGrQHHw07KVwJpRRbYiKuyCc8+MhXcyXocP2RnvMvJhr8QIBK08EPbGJiQuqq0mX7KD4GIohi4xVPTU0N6/BRamPwu7u7dZb3/RozkW3IB3lZEkGHayeI8FFVVdWaZAIUcD2Wl5fbHHy024XtC6QBkomA/XHIFb8X0Xamp6efASHqt27dGnkVkcNxVlFRoXJycmwOvuLGNmifVATsD/bLZezgKgKE2J+bm3sKHk3XXUWs4Mz87Oxs24OvOLEN26cUAfvFXAkrlKGBCDNXEbAajldXV1+5ijjP+KCrg855x+3nk2uy8SwDdIIIM1cRI6k+0NraqkZGRmzuKAIbFrYf0Q2UaPOA/Wpra3PBNfHhYHq6HbC5qanpGB7ETgPWc0TApTr7eyDolOaj6LRG+/W2Bn94eJg7+DpcowZ+AGb+642NjYfC3wEdXAdI1uK2Du2ksH2HrcHHfggGX4frNVcRMPh7BwcHN8ZiseuuIr4DvKXib29YX2bhmW+wEqYptsREXC2eWXS44oyfuYqYmpra19LSEnkaRgEG6Nj8gGRHESVCRkaG9Kg+IOyTiGtmZqatnZsOV/zMLnjcsF7KH5AIECVCX1+f6u3tlbg4oLmc2VyDy8HgPshg2yzmCo8aFsdAALzpw9dw23REwJkvHPwjSu92UcwVRcAnAd4LaQ6+CVe2AGivAe5WwhcdGp0aoVgmJuIqnBy2uSa18Buxs4AXAJMO401SjLOGfnziyhYg2GrtcNSxSfJ90pI/n7iyBUA7quKv/IYsxhmiZ/ZRy/x94soWAO1nwL0qnhVw2cD/ZfKBvjod9cEnrmwB0DBh9RUVfxHxhYrnUHLtEn2mlHyMOe6HT1wT7oISGSas4ntNzJmsVFczjnMBN1CbfwGD1BYPID8A/lFzbz5xZQsQnmWfExa6ecNVIsBKWuIlgA0qnjG2PLhsou0aZgF3qfil2fg89ssbrhwBNtB+GN/dLUnQ5kbCHYAnAFMAvGpsoY7OlS0krmOhxx7WLHwAeBLwVahN2uIUswgrPB5T8rRv7DxWqDwM+JaCjzue8b5wZe2C7gJ8quKVJqY599vJ1yZHffCJK0uA+wAfAtZYjIO+Gsi3TfOJK0sAfFP/jpKV+HBtKfkutOTPJ64sAVYD3qXgrmwpxVht6McnrmwBMAP4pjlYdRij3tCHT1xZAuDdermOA836gDKKqWNirob1ASZc2eeAl3QH36A+AGP+ohFWxNVSfYAuV9YKyKUTo/bgo2nUB5RQbImJuFqsD9DhyhbAuDgjMI36gFKX7S3XB5S6egSV2Bh8zYyDYjr4SGYi2yzmMIm5YnFGkFOLSQGNjY3X/BtaLBabWQF5XKcO6gOkZT950gAW6wPWuXoEZXEaOqoPyHLcPqkIwvqALFcCZHJmvqP6gEzH7VOKIKgPyHQlwIVUjRzWB1xw3H4+ubIFGE3VyGF9wKjj9ik3D4L6gFFXArCSTlEEzKe3LMIfwvYDNgcf+4P9csSVLUAXt7GD+oBuYfsuW4OvUR/Q7UoA/G2zaRvbOqEI0xRbYiKulusDTrgSYEg6sxKJIKwP6FLyjDYRV4v1ATpc2QKgNZtu6zTqA5o1ObM/h5eDyMvCtrlZObLgNhRv+jAHvkwqQjDzhYPfrvRvF0VcLdQHaHGNxWKrZv0d//hahcqr8Ccww1kRbwPuVMIXHRqd+ptimZiIq0F9gA2urEcQ2jkVf/tz0WG8ixTjnKEfn7iyBQi2WnuULLlV0qE9FrdzPnFlC4CGRQkvqyQ/MqRh6KtO2S948IkrWwC0XwHPAQ4r85z7w+TL1U8Y+8Q14S4oyjA9703AZ4AqFX8RvoTpN8i3/Bi/p+egHz5xZQsQGCasvqGuZhzj76DdpuIZx8FPuOAviWDG8e8qXl0yXxnHPnGdsf8FGAByGwC02iMZswAAAABJRU5ErkJggg==" 26 27 func TestVectorizer(t *testing.T) { 28 t.Run("should vectorize image", func(t *testing.T) { 29 // given 30 client := &fakeClient{} 31 vectorizer := &Vectorizer{client} 32 config := newConfigBuilder().addSetting("imageFields", []interface{}{"image"}).build() 33 34 propsSchema := []*models.Property{ 35 { 36 Name: "image", 37 DataType: schema.DataTypeBlob.PropString(), 38 }, 39 } 40 props := map[string]interface{}{ 41 "image": image, 42 } 43 object := &models.Object{ 44 ID: "some-uuid", 45 Properties: props, 46 } 47 comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props) 48 49 // when 50 vector, _, err := vectorizer.Object(context.Background(), object, comp, config) 51 52 // then 53 require.Nil(t, err) 54 assert.NotNil(t, vector) 55 }) 56 57 t.Run("should vectorize 2 image fields", func(t *testing.T) { 58 // given 59 client := &fakeClient{} 60 vectorizer := &Vectorizer{client} 61 config := newConfigBuilder().addSetting("imageFields", []interface{}{"image1", "image2"}).build() 62 63 propsSchema := []*models.Property{ 64 { 65 Name: "image1", 66 DataType: schema.DataTypeBlob.PropString(), 67 }, 68 { 69 Name: "image2", 70 DataType: schema.DataTypeBlob.PropString(), 71 }, 72 } 73 props := map[string]interface{}{ 74 "image1": image, 75 "image2": image, 76 } 77 object := &models.Object{ 78 ID: "some-uuid", 79 Properties: props, 80 } 81 comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props) 82 83 // when 84 vector, _, err := vectorizer.Object(context.Background(), object, comp, config) 85 86 // then 87 require.Nil(t, err) 88 assert.NotNil(t, vector) 89 }) 90 } 91 92 func TestVectorizerWithDiff(t *testing.T) { 93 type testCase struct { 94 name string 95 input *models.Object 96 comp moduletools.VectorizablePropsComparator 97 expectedVectorize bool 98 } 99 100 propsSchema := []*models.Property{ 101 { 102 Name: "image", 103 DataType: schema.DataTypeBlob.PropString(), 104 }, 105 { 106 Name: "text", 107 DataType: schema.DataTypeText.PropString(), 108 }, 109 { 110 Name: "description", 111 DataType: schema.DataTypeText.PropString(), 112 }, 113 } 114 props := map[string]interface{}{ 115 "image": image, 116 "text": "text", 117 "description": "non-vectorizable", 118 } 119 vector := []float32{0, 0, 0, 0, 0} 120 var vectors models.Vectors 121 122 tests := []testCase{ 123 { 124 name: "noop comp", 125 input: &models.Object{ 126 ID: "some-uuid", 127 Properties: props, 128 }, 129 comp: moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props), 130 expectedVectorize: true, 131 }, 132 { 133 name: "all props unchanged", 134 input: &models.Object{ 135 ID: "some-uuid", 136 Properties: props, 137 }, 138 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors), 139 expectedVectorize: false, 140 }, 141 { 142 name: "one vectorizable prop changed (1)", 143 input: &models.Object{ 144 ID: "some-uuid", 145 Properties: props, 146 }, 147 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 148 "image": nil, 149 "text": "text", 150 "description": "non-vectorizable", 151 }, vector, vectors), 152 expectedVectorize: true, 153 }, 154 { 155 name: "one vectorizable prop changed (2)", 156 input: &models.Object{ 157 ID: "some-uuid", 158 Properties: props, 159 }, 160 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 161 "image": image, 162 "text": "old text", 163 "description": "non-vectorizable", 164 }, vector, vectors), 165 expectedVectorize: true, 166 }, 167 { 168 name: "all non-vectorizable props changed", 169 input: &models.Object{ 170 ID: "some-uuid", 171 Properties: props, 172 }, 173 comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{ 174 "image": image, 175 "text": "text", 176 "description": "old non-vectorizable", 177 }, vector, vectors), 178 expectedVectorize: false, 179 }, 180 } 181 182 for _, test := range tests { 183 t.Run(test.name, func(t *testing.T) { 184 client := &fakeClient{} 185 vectorizer := &Vectorizer{client} 186 config := newConfigBuilder(). 187 addSetting("imageFields", []interface{}{"image"}). 188 addSetting("textFields", []interface{}{"text"}). 189 build() 190 191 vector, _, err := vectorizer.Object(context.Background(), test.input, test.comp, config) 192 193 require.Nil(t, err) 194 if test.expectedVectorize { 195 assert.Equal(t, []float32{5.5, 11, 16.5, 22, 27.5}, vector) 196 } else { 197 assert.Equal(t, []float32{0, 0, 0, 0, 0}, vector) 198 } 199 }) 200 } 201 } 202 203 func TestVectorizer_normalizeWeights(t *testing.T) { 204 tests := []struct { 205 name string 206 weights []float32 207 }{ 208 { 209 name: "normalize example 1", 210 weights: []float32{200, 100, 0.1}, 211 }, 212 { 213 name: "normalize example 2", 214 weights: []float32{300.22, 0.7, 17, 54}, 215 }, 216 { 217 name: "normalize example 3", 218 weights: []float32{300, 0.02, 17}, 219 }, 220 { 221 name: "normalize example 4", 222 weights: []float32{500, 0.02, 17.4, 180}, 223 }, 224 { 225 name: "normalize example 5", 226 weights: []float32{500, 0.02, 17.4, 2, 4, 5, .88}, 227 }, 228 } 229 for _, tt := range tests { 230 t.Run(tt.name, func(t *testing.T) { 231 v := &Vectorizer{} 232 if got := v.normalizeWeights(tt.weights); !checkNormalization(got) { 233 t.Errorf("Vectorizer.normalizeWeights() = %v, want %v", got, 1.0) 234 } 235 }) 236 } 237 } 238 239 func checkNormalization(weights []float32) bool { 240 var result float32 241 for i := range weights { 242 result += weights[i] 243 } 244 return result == 1.0 245 }