github.com/weaviate/weaviate@v1.24.6/modules/text2vec-jinaai/vectorizer/objects_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"context"
    16  	"testing"
    17  
    18  	"github.com/stretchr/testify/assert"
    19  	"github.com/stretchr/testify/require"
    20  	"github.com/weaviate/weaviate/entities/models"
    21  	"github.com/weaviate/weaviate/entities/moduletools"
    22  	"github.com/weaviate/weaviate/entities/schema"
    23  )
    24  
    25  // These are mostly copy/pasted (with minimal additions) from the
    26  // text2vec-contextionary module
    27  func TestVectorizingObjects(t *testing.T) {
    28  	type testCase struct {
    29  		name                string
    30  		input               *models.Object
    31  		expectedClientCall  string
    32  		expectedJinaAIModel string
    33  		noindex             string
    34  		excludedProperty    string // to simulate a schema where property names aren't vectorized
    35  		excludedClass       string // to simulate a schema where class names aren't vectorized
    36  		jinaAIModel         string
    37  	}
    38  
    39  	propsSchema := []*models.Property{
    40  		{
    41  			Name:     "brand",
    42  			DataType: schema.DataTypeText.PropString(),
    43  		},
    44  		{
    45  			Name:     "power",
    46  			DataType: schema.DataTypeInt.PropString(),
    47  		},
    48  		{
    49  			Name:     "review",
    50  			DataType: schema.DataTypeText.PropString(),
    51  		},
    52  		{
    53  			Name:     "brandOfTheCar",
    54  			DataType: schema.DataTypeText.PropString(),
    55  		},
    56  		{
    57  			Name:     "reviews",
    58  			DataType: schema.DataTypeTextArray.PropString(),
    59  		},
    60  	}
    61  
    62  	tests := []testCase{
    63  		{
    64  			name: "empty object",
    65  			input: &models.Object{
    66  				Class: "Car",
    67  			},
    68  			jinaAIModel:         "jina-embedding-v2",
    69  			expectedJinaAIModel: "jina-embedding-v2",
    70  			expectedClientCall:  "car",
    71  		},
    72  		{
    73  			name: "object with one string prop",
    74  			input: &models.Object{
    75  				Class: "Car",
    76  				Properties: map[string]interface{}{
    77  					"brand": "Mercedes",
    78  				},
    79  			},
    80  			expectedClientCall: "car brand mercedes",
    81  		},
    82  		{
    83  			name: "object with one non-string prop",
    84  			input: &models.Object{
    85  				Class: "Car",
    86  				Properties: map[string]interface{}{
    87  					"power": 300,
    88  				},
    89  			},
    90  			expectedClientCall: "car",
    91  		},
    92  		{
    93  			name: "object with a mix of props",
    94  			input: &models.Object{
    95  				Class: "Car",
    96  				Properties: map[string]interface{}{
    97  					"brand":  "best brand",
    98  					"power":  300,
    99  					"review": "a very great car",
   100  				},
   101  			},
   102  			expectedClientCall: "car brand best brand review a very great car",
   103  		},
   104  		{
   105  			name:    "with a noindexed property",
   106  			noindex: "review",
   107  			input: &models.Object{
   108  				Class: "Car",
   109  				Properties: map[string]interface{}{
   110  					"brand":  "best brand",
   111  					"power":  300,
   112  					"review": "a very great car",
   113  				},
   114  			},
   115  			expectedClientCall: "car brand best brand",
   116  		},
   117  		{
   118  			name:          "with the class name not vectorized",
   119  			excludedClass: "Car",
   120  			input: &models.Object{
   121  				Class: "Car",
   122  				Properties: map[string]interface{}{
   123  					"brand":  "best brand",
   124  					"power":  300,
   125  					"review": "a very great car",
   126  				},
   127  			},
   128  			expectedClientCall: "brand best brand review a very great car",
   129  		},
   130  		{
   131  			name:             "with a property name not vectorized",
   132  			excludedProperty: "review",
   133  			input: &models.Object{
   134  				Class: "Car",
   135  				Properties: map[string]interface{}{
   136  					"brand":  "best brand",
   137  					"power":  300,
   138  					"review": "a very great car",
   139  				},
   140  			},
   141  			expectedClientCall: "car brand best brand a very great car",
   142  		},
   143  		{
   144  			name:             "with no schema labels vectorized",
   145  			excludedProperty: "review",
   146  			excludedClass:    "Car",
   147  			input: &models.Object{
   148  				Class: "Car",
   149  				Properties: map[string]interface{}{
   150  					"review": "a very great car",
   151  				},
   152  			},
   153  			expectedClientCall: "a very great car",
   154  		},
   155  		{
   156  			name:             "with string/text arrays without propname or classname",
   157  			excludedProperty: "reviews",
   158  			excludedClass:    "Car",
   159  			input: &models.Object{
   160  				Class: "Car",
   161  				Properties: map[string]interface{}{
   162  					"reviews": []string{
   163  						"a very great car",
   164  						"you should consider buying one",
   165  					},
   166  				},
   167  			},
   168  			expectedClientCall: "a very great car you should consider buying one",
   169  		},
   170  		{
   171  			name: "with string/text arrays with propname and classname",
   172  			input: &models.Object{
   173  				Class: "Car",
   174  				Properties: map[string]interface{}{
   175  					"reviews": []string{
   176  						"a very great car",
   177  						"you should consider buying one",
   178  					},
   179  				},
   180  			},
   181  			expectedClientCall: "car reviews a very great car reviews you should consider buying one",
   182  		},
   183  		{
   184  			name: "with compound class and prop names",
   185  			input: &models.Object{
   186  				Class: "SuperCar",
   187  				Properties: map[string]interface{}{
   188  					"brandOfTheCar": "best brand",
   189  					"power":         300,
   190  					"review":        "a very great car",
   191  				},
   192  			},
   193  			expectedClientCall: "super car brand of the car best brand review a very great car",
   194  		},
   195  	}
   196  
   197  	for _, test := range tests {
   198  		t.Run(test.name, func(t *testing.T) {
   199  			client := &fakeClient{}
   200  
   201  			v := New(client)
   202  
   203  			ic := &fakeClassConfig{
   204  				excludedProperty:      test.excludedProperty,
   205  				skippedProperty:       test.noindex,
   206  				vectorizeClassName:    test.excludedClass != "Car",
   207  				jinaAIModel:           test.jinaAIModel,
   208  				vectorizePropertyName: true,
   209  			}
   210  			comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties)
   211  			vector, _, err := v.Object(context.Background(), test.input, comp, ic)
   212  
   213  			require.Nil(t, err)
   214  			assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   215  			assert.Equal(t, []string{test.expectedClientCall}, client.lastInput)
   216  			assert.Equal(t, client.lastConfig.Model, test.expectedJinaAIModel)
   217  		})
   218  	}
   219  }
   220  
   221  func TestClassSettings(t *testing.T) {
   222  	type testCase struct {
   223  		expectedBaseURL string
   224  		cfg             moduletools.ClassConfig
   225  	}
   226  	tests := []testCase{
   227  		{
   228  			cfg: fakeClassConfig{
   229  				classConfig: make(map[string]interface{}),
   230  			},
   231  			expectedBaseURL: DefaultBaseURL,
   232  		},
   233  		{
   234  			cfg: fakeClassConfig{
   235  				classConfig: map[string]interface{}{
   236  					"baseURL": "https://proxy.weaviate.dev",
   237  				},
   238  			},
   239  			expectedBaseURL: "https://proxy.weaviate.dev",
   240  		},
   241  	}
   242  
   243  	for _, tt := range tests {
   244  		ic := NewClassSettings(tt.cfg)
   245  		assert.Equal(t, tt.expectedBaseURL, ic.BaseURL())
   246  	}
   247  }
   248  
   249  func TestVectorizingObjectWithDiff(t *testing.T) {
   250  	type testCase struct {
   251  		name              string
   252  		input             *models.Object
   253  		skipped           string
   254  		comp              moduletools.VectorizablePropsComparator
   255  		expectedVectorize bool
   256  	}
   257  
   258  	propsSchema := []*models.Property{
   259  		{
   260  			Name:     "brand",
   261  			DataType: schema.DataTypeText.PropString(),
   262  		},
   263  		{
   264  			Name:     "power",
   265  			DataType: schema.DataTypeInt.PropString(),
   266  		},
   267  		{
   268  			Name:     "description",
   269  			DataType: schema.DataTypeText.PropString(),
   270  		},
   271  		{
   272  			Name:     "reviews",
   273  			DataType: schema.DataTypeTextArray.PropString(),
   274  		},
   275  	}
   276  	props := map[string]interface{}{
   277  		"brand":       "best brand",
   278  		"power":       300,
   279  		"description": "a very great car",
   280  		"reviews": []string{
   281  			"a very great car",
   282  			"you should consider buying one",
   283  		},
   284  	}
   285  	vector := []float32{0, 0, 0, 0}
   286  	var vectors models.Vectors
   287  
   288  	tests := []testCase{
   289  		{
   290  			name: "noop comp",
   291  			input: &models.Object{
   292  				Class:      "Car",
   293  				Properties: props,
   294  			},
   295  			comp:              moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props),
   296  			expectedVectorize: true,
   297  		},
   298  		{
   299  			name: "all props unchanged",
   300  			input: &models.Object{
   301  				Class:      "Car",
   302  				Properties: props,
   303  			},
   304  			comp:              moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors),
   305  			expectedVectorize: false,
   306  		},
   307  		{
   308  			name: "one vectorizable prop changed (1)",
   309  			input: &models.Object{
   310  				Class:      "Car",
   311  				Properties: props,
   312  			},
   313  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   314  				"brand":       "old best brand",
   315  				"power":       300,
   316  				"description": "a very great car",
   317  				"reviews": []string{
   318  					"a very great car",
   319  					"you should consider buying one",
   320  				},
   321  			}, vector, vectors),
   322  			expectedVectorize: true,
   323  		},
   324  		{
   325  			name: "one vectorizable prop changed (2)",
   326  			input: &models.Object{
   327  				Class:      "Car",
   328  				Properties: props,
   329  			},
   330  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   331  				"brand":       "best brand",
   332  				"power":       300,
   333  				"description": "old a very great car",
   334  				"reviews": []string{
   335  					"a very great car",
   336  					"you should consider buying one",
   337  				},
   338  			}, vector, vectors),
   339  			expectedVectorize: true,
   340  		},
   341  		{
   342  			name: "one vectorizable prop changed (3)",
   343  			input: &models.Object{
   344  				Class:      "Car",
   345  				Properties: props,
   346  			},
   347  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   348  				"brand":       "best brand",
   349  				"power":       300,
   350  				"description": "a very great car",
   351  				"reviews": []string{
   352  					"old a very great car",
   353  					"you should consider buying one",
   354  				},
   355  			}, vector, vectors),
   356  			expectedVectorize: true,
   357  		},
   358  		{
   359  			name:    "all non-vectorizable props changed",
   360  			skipped: "description",
   361  			input: &models.Object{
   362  				Class:      "Car",
   363  				Properties: props,
   364  			},
   365  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   366  				"brand":       "best brand",
   367  				"power":       123,
   368  				"description": "old a very great car",
   369  				"reviews": []string{
   370  					"a very great car",
   371  					"you should consider buying one",
   372  				},
   373  			}, vector, vectors),
   374  			expectedVectorize: false,
   375  		},
   376  	}
   377  
   378  	for _, test := range tests {
   379  		t.Run(test.name, func(t *testing.T) {
   380  			ic := &fakeClassConfig{
   381  				skippedProperty: test.skipped,
   382  			}
   383  
   384  			client := &fakeClient{}
   385  			v := New(client)
   386  
   387  			vector, _, err := v.Object(context.Background(), test.input, test.comp, ic)
   388  
   389  			require.Nil(t, err)
   390  			if test.expectedVectorize {
   391  				assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   392  				assert.NotEmpty(t, client.lastInput)
   393  			} else {
   394  				assert.Equal(t, []float32{0, 0, 0, 0}, vector)
   395  				assert.Empty(t, client.lastInput)
   396  			}
   397  		})
   398  	}
   399  }