github.com/weaviate/weaviate@v1.24.6/modules/multi2vec-bind/vectorizer/vectorizer_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"context"
    16  	"testing"
    17  
    18  	"github.com/stretchr/testify/assert"
    19  	"github.com/stretchr/testify/require"
    20  	"github.com/weaviate/weaviate/entities/models"
    21  	"github.com/weaviate/weaviate/entities/moduletools"
    22  	"github.com/weaviate/weaviate/entities/schema"
    23  )
    24  
    25  const image = "iVBORw0KGgoAAAANSUhEUgAAAGAAAAA/CAYAAAAfQM0aAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAyRpVFh0WE1MOmNvbS5hZG9iZS54bXAAAAAAADw/eHBhY2tldCBiZWdpbj0i77u/IiBpZD0iVzVNME1wQ2VoaUh6cmVTek5UY3prYzlkIj8+IDx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6bWV0YS8iIHg6eG1wdGs9IkFkb2JlIFhNUCBDb3JlIDUuMy1jMDExIDY2LjE0NTY2MSwgMjAxMi8wMi8wNi0xNDo1NjoyNyAgICAgICAgIj4gPHJkZjpSREYgeG1sbnM6cmRmPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1zeW50YXgtbnMjIj4gPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIgeG1sbnM6eG1wPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvIiB4bWxuczp4bXBNTT0iaHR0cDovL25zLmFkb2JlLmNvbS94YXAvMS4wL21tLyIgeG1sbnM6c3RSZWY9Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9zVHlwZS9SZXNvdXJjZVJlZiMiIHhtcDpDcmVhdG9yVG9vbD0iQWRvYmUgUGhvdG9zaG9wIENTNiAoTWFjaW50b3NoKSIgeG1wTU06SW5zdGFuY2VJRD0ieG1wLmlpZDpCRjQ5NEM3RDI5QTkxMUUyOTc1NENCMzI4N0QwNDNCOSIgeG1wTU06RG9jdW1lbnRJRD0ieG1wLmRpZDpCRjQ5NEM3RTI5QTkxMUUyOTc1NENCMzI4N0QwNDNCOSI+IDx4bXBNTTpEZXJpdmVkRnJvbSBzdFJlZjppbnN0YW5jZUlEPSJ4bXAuaWlkOkJGNDk0QzdCMjlBOTExRTI5NzU0Q0IzMjg3RDA0M0I5IiBzdFJlZjpkb2N1bWVudElEPSJ4bXAuZGlkOkJGNDk0QzdDMjlBOTExRTI5NzU0Q0IzMjg3RDA0M0I5Ii8+IDwvcmRmOkRlc2NyaXB0aW9uPiA8L3JkZjpSREY+IDwveDp4bXBtZXRhPiA8P3hwYWNrZXQgZW5kPSJyIj8+WeGRxAAAB2hJREFUeNrUXFtslUUQ3hJCoQVEKy0k1qQgrRg0vaAJaq1tvJSgaLy8mKDF2IvxBY2Bgm8+iIoxvhB72tTUmKgPigbFKCEtxeKD9hZjAi3GJrYJtqRai7TQB+pMz/zwU/5zzsxe2u4kXwiwZ+bb/Xb/s7v/zEmrra1VTFsFeBRQCtgEuBWwkv5vHPAn4DdAB+B7wBjXcUNDQ8o2dXV1SmDzyhUtLS3tBPyxC9CdrN1ihi/swKuA7YD0BG1uJhQDngdcAnwDeJ86Ole2kLii+J2AFsA+wF9RjRalmEUHaZY8m6RDUYZtn6HPHiRfLm2hck0D7AScAdRH8UokwD2AnwA7UoiUyhaRD/S12dHg+8B1OWA/4BTgqVQCPEJL8haLBNDXEfJt03ziipYH+BJwHFAYJcAWwCeAZQ6CLyPfWyz584nrbCuj74eHwgKsddih2R1ba+jHJ65R1k6PuWNhAd4DZM/BTiWbdhwm5hPXsA0AngY8COgNP4JwSTyu4zE/P18VFhZKP7aNYuouXxFX5Ic8Nc2Ea2D/AfYCNgIORZ0DdusOfnFxcXDwUD09PZKP76alKDUR16KiIlVQUHDl7/39/Uozpg7Xac45YB0dGrQHHw07KVwJpRRbYiKuyCc8+MhXcyXocP2RnvMvJhr8QIBK08EPbGJiQuqq0mX7KD4GIohi4xVPTU0N6/BRamPwu7u7dZb3/RozkW3IB3lZEkGHayeI8FFVVdWaZAIUcD2Wl5fbHHy024XtC6QBkomA/XHIFb8X0Xamp6efASHqt27dGnkVkcNxVlFRoXJycmwOvuLGNmifVATsD/bLZezgKgKE2J+bm3sKHk3XXUWs4Mz87Oxs24OvOLEN26cUAfvFXAkrlKGBCDNXEbAajldXV1+5ijjP+KCrg855x+3nk2uy8SwDdIIIM1cRI6k+0NraqkZGRmzuKAIbFrYf0Q2UaPOA/Wpra3PBNfHhYHq6HbC5qanpGB7ETgPWc0TApTr7eyDolOaj6LRG+/W2Bn94eJg7+DpcowZ+AGb+642NjYfC3wEdXAdI1uK2Du2ksH2HrcHHfggGX4frNVcRMPh7BwcHN8ZiseuuIr4DvKXib29YX2bhmW+wEqYptsREXC2eWXS44oyfuYqYmpra19LSEnkaRgEG6Nj8gGRHESVCRkaG9Kg+IOyTiGtmZqatnZsOV/zMLnjcsF7KH5AIECVCX1+f6u3tlbg4oLmc2VyDy8HgPshg2yzmCo8aFsdAALzpw9dw23REwJkvHPwjSu92UcwVRcAnAd4LaQ6+CVe2AGivAe5WwhcdGp0aoVgmJuIqnBy2uSa18Buxs4AXAJMO401SjLOGfnziyhYg2GrtcNSxSfJ90pI/n7iyBUA7quKv/IYsxhmiZ/ZRy/x94soWAO1nwL0qnhVw2cD/ZfKBvjod9cEnrmwB0DBh9RUVfxHxhYrnUHLtEn2mlHyMOe6HT1wT7oISGSas4ntNzJmsVFczjnMBN1CbfwGD1BYPID8A/lFzbz5xZQsQnmWfExa6ecNVIsBKWuIlgA0qnjG2PLhsou0aZgF3qfil2fg89ssbrhwBNtB+GN/dLUnQ5kbCHYAnAFMAvGpsoY7OlS0krmOhxx7WLHwAeBLwVahN2uIUswgrPB5T8rRv7DxWqDwM+JaCjzue8b5wZe2C7gJ8quKVJqY599vJ1yZHffCJK0uA+wAfAtZYjIO+Gsi3TfOJK0sAfFP/jpKV+HBtKfkutOTPJ64sAVYD3qXgrmwpxVht6McnrmwBMAP4pjlYdRij3tCHT1xZAuDdermOA836gDKKqWNirob1ASZc2eeAl3QH36A+AGP+ohFWxNVSfYAuV9YKyKUTo/bgo2nUB5RQbImJuFqsD9DhyhbAuDgjMI36gFKX7S3XB5S6egSV2Bh8zYyDYjr4SGYi2yzmMIm5YnFGkFOLSQGNjY3X/BtaLBabWQF5XKcO6gOkZT950gAW6wPWuXoEZXEaOqoPyHLcPqkIwvqALFcCZHJmvqP6gEzH7VOKIKgPyHQlwIVUjRzWB1xw3H4+ubIFGE3VyGF9wKjj9ik3D4L6gFFXArCSTlEEzKe3LMIfwvYDNgcf+4P9csSVLUAXt7GD+oBuYfsuW4OvUR/Q7UoA/G2zaRvbOqEI0xRbYiKulusDTrgSYEg6sxKJIKwP6FLyjDYRV4v1ATpc2QKgNZtu6zTqA5o1ObM/h5eDyMvCtrlZObLgNhRv+jAHvkwqQjDzhYPfrvRvF0VcLdQHaHGNxWKrZv0d//hahcqr8Ccww1kRbwPuVMIXHRqd+ptimZiIq0F9gA2urEcQ2jkVf/tz0WG8ixTjnKEfn7iyBQi2WnuULLlV0qE9FrdzPnFlC4CGRQkvqyQ/MqRh6KtO2S948IkrWwC0XwHPAQ4r85z7w+TL1U8Y+8Q14S4oyjA9703AZ4AqFX8RvoTpN8i3/Bi/p+egHz5xZQsQGCasvqGuZhzj76DdpuIZx8FPuOAviWDG8e8qXl0yXxnHPnGdsf8FGAByGwC02iMZswAAAABJRU5ErkJggg=="
    26  
    27  func TestVectorizer(t *testing.T) {
    28  	t.Run("should vectorize image", func(t *testing.T) {
    29  		// given
    30  		client := &fakeClient{}
    31  		vectorizer := &Vectorizer{client}
    32  		config := newConfigBuilder().addSetting("imageFields", []interface{}{"image"}).build()
    33  
    34  		propsSchema := []*models.Property{
    35  			{
    36  				Name:     "image",
    37  				DataType: schema.DataTypeBlob.PropString(),
    38  			},
    39  		}
    40  		props := map[string]interface{}{
    41  			"image": image,
    42  		}
    43  		object := &models.Object{
    44  			ID:         "some-uuid",
    45  			Properties: props,
    46  		}
    47  		comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props)
    48  
    49  		// when
    50  		vector, _, err := vectorizer.Object(context.Background(), object, comp, config)
    51  
    52  		// then
    53  		require.Nil(t, err)
    54  		assert.NotNil(t, vector)
    55  	})
    56  
    57  	t.Run("should vectorize 2 image fields", func(t *testing.T) {
    58  		// given
    59  		client := &fakeClient{}
    60  		vectorizer := &Vectorizer{client}
    61  		config := newConfigBuilder().addSetting("imageFields", []interface{}{"image1", "image2"}).build()
    62  
    63  		propsSchema := []*models.Property{
    64  			{
    65  				Name:     "image1",
    66  				DataType: schema.DataTypeBlob.PropString(),
    67  			},
    68  			{
    69  				Name:     "image2",
    70  				DataType: schema.DataTypeBlob.PropString(),
    71  			},
    72  		}
    73  		props := map[string]interface{}{
    74  			"image1": image,
    75  			"image2": image,
    76  		}
    77  		object := &models.Object{
    78  			ID:         "some-uuid",
    79  			Properties: props,
    80  		}
    81  		comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props)
    82  
    83  		// when
    84  		vector, _, err := vectorizer.Object(context.Background(), object, comp, config)
    85  
    86  		// then
    87  		require.Nil(t, err)
    88  		assert.NotNil(t, vector)
    89  	})
    90  }
    91  
    92  func TestVectorizerWithDiff(t *testing.T) {
    93  	type testCase struct {
    94  		name              string
    95  		input             *models.Object
    96  		comp              moduletools.VectorizablePropsComparator
    97  		expectedVectorize bool
    98  	}
    99  
   100  	propsSchema := []*models.Property{
   101  		{
   102  			Name:     "image",
   103  			DataType: schema.DataTypeBlob.PropString(),
   104  		},
   105  		{
   106  			Name:     "text",
   107  			DataType: schema.DataTypeText.PropString(),
   108  		},
   109  		{
   110  			Name:     "description",
   111  			DataType: schema.DataTypeText.PropString(),
   112  		},
   113  	}
   114  	props := map[string]interface{}{
   115  		"image":       image,
   116  		"text":        "text",
   117  		"description": "non-vectorizable",
   118  	}
   119  	vector := []float32{0, 0, 0, 0, 0}
   120  	var vectors models.Vectors
   121  
   122  	tests := []testCase{
   123  		{
   124  			name: "noop comp",
   125  			input: &models.Object{
   126  				ID:         "some-uuid",
   127  				Properties: props,
   128  			},
   129  			comp:              moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props),
   130  			expectedVectorize: true,
   131  		},
   132  		{
   133  			name: "all props unchanged",
   134  			input: &models.Object{
   135  				ID:         "some-uuid",
   136  				Properties: props,
   137  			},
   138  			comp:              moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors),
   139  			expectedVectorize: false,
   140  		},
   141  		{
   142  			name: "one vectorizable prop changed (1)",
   143  			input: &models.Object{
   144  				ID:         "some-uuid",
   145  				Properties: props,
   146  			},
   147  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   148  				"image":       nil,
   149  				"text":        "text",
   150  				"description": "non-vectorizable",
   151  			}, vector, vectors),
   152  			expectedVectorize: true,
   153  		},
   154  		{
   155  			name: "one vectorizable prop changed (2)",
   156  			input: &models.Object{
   157  				ID:         "some-uuid",
   158  				Properties: props,
   159  			},
   160  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   161  				"image":       image,
   162  				"text":        "old text",
   163  				"description": "non-vectorizable",
   164  			}, vector, vectors),
   165  			expectedVectorize: true,
   166  		},
   167  		{
   168  			name: "all non-vectorizable props changed",
   169  			input: &models.Object{
   170  				ID:         "some-uuid",
   171  				Properties: props,
   172  			},
   173  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   174  				"image":       image,
   175  				"text":        "text",
   176  				"description": "old non-vectorizable",
   177  			}, vector, vectors),
   178  			expectedVectorize: false,
   179  		},
   180  	}
   181  
   182  	for _, test := range tests {
   183  		t.Run(test.name, func(t *testing.T) {
   184  			client := &fakeClient{}
   185  			vectorizer := &Vectorizer{client}
   186  			config := newConfigBuilder().
   187  				addSetting("imageFields", []interface{}{"image"}).
   188  				addSetting("textFields", []interface{}{"text"}).
   189  				build()
   190  
   191  			vector, _, err := vectorizer.Object(context.Background(), test.input, test.comp, config)
   192  
   193  			require.Nil(t, err)
   194  			if test.expectedVectorize {
   195  				assert.Equal(t, []float32{5.5, 11, 16.5, 22, 27.5}, vector)
   196  			} else {
   197  				assert.Equal(t, []float32{0, 0, 0, 0, 0}, vector)
   198  			}
   199  		})
   200  	}
   201  }
   202  
   203  func TestVectorizer_normalizeWeights(t *testing.T) {
   204  	tests := []struct {
   205  		name    string
   206  		weights []float32
   207  	}{
   208  		{
   209  			name:    "normalize example 1",
   210  			weights: []float32{200, 100, 0.1},
   211  		},
   212  		{
   213  			name:    "normalize example 2",
   214  			weights: []float32{300.22, 0.7, 17, 54},
   215  		},
   216  		{
   217  			name:    "normalize example 3",
   218  			weights: []float32{300, 0.02, 17},
   219  		},
   220  		{
   221  			name:    "normalize example 4",
   222  			weights: []float32{500, 0.02, 17.4, 180},
   223  		},
   224  		{
   225  			name:    "normalize example 5",
   226  			weights: []float32{500, 0.02, 17.4, 2, 4, 5, .88},
   227  		},
   228  	}
   229  	for _, tt := range tests {
   230  		t.Run(tt.name, func(t *testing.T) {
   231  			v := &Vectorizer{}
   232  			if got := v.normalizeWeights(tt.weights); !checkNormalization(got) {
   233  				t.Errorf("Vectorizer.normalizeWeights() = %v, want %v", got, 1.0)
   234  			}
   235  		})
   236  	}
   237  }
   238  
   239  func checkNormalization(weights []float32) bool {
   240  	var result float32
   241  	for i := range weights {
   242  		result += weights[i]
   243  	}
   244  	return result == 1.0
   245  }