github.com/weaviate/weaviate@v1.24.6/modules/text2vec-contextionary/vectorizer/vectorizer_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"context"
    16  	"strings"
    17  	"testing"
    18  
    19  	"github.com/stretchr/testify/assert"
    20  	"github.com/stretchr/testify/require"
    21  	"github.com/weaviate/weaviate/entities/models"
    22  	"github.com/weaviate/weaviate/entities/moduletools"
    23  	"github.com/weaviate/weaviate/entities/schema"
    24  )
    25  
    26  func TestVectorizingObjects(t *testing.T) {
    27  	type testCase struct {
    28  		name               string
    29  		input              *models.Object
    30  		expectedClientCall []string
    31  		noindex            string
    32  		excludedProperty   string // to simulate a schema where property names aren't vectorized
    33  		excludedClass      string // to simulate a schema where class names aren't vectorized
    34  	}
    35  
    36  	propsSchema := []*models.Property{
    37  		{
    38  			Name:     "brand",
    39  			DataType: schema.DataTypeText.PropString(),
    40  		},
    41  		{
    42  			Name:     "power",
    43  			DataType: schema.DataTypeInt.PropString(),
    44  		},
    45  		{
    46  			Name:     "review",
    47  			DataType: schema.DataTypeText.PropString(),
    48  		},
    49  		{
    50  			Name:     "brandOfTheCar",
    51  			DataType: schema.DataTypeText.PropString(),
    52  		},
    53  		{
    54  			Name:     "reviews",
    55  			DataType: schema.DataTypeTextArray.PropString(),
    56  		},
    57  	}
    58  
    59  	tests := []testCase{
    60  		{
    61  			name: "empty object",
    62  			input: &models.Object{
    63  				Class: "Car",
    64  			},
    65  			expectedClientCall: []string{"car"},
    66  		},
    67  		{
    68  			name: "object with one string prop",
    69  			input: &models.Object{
    70  				Class: "Car",
    71  				Properties: map[string]interface{}{
    72  					"brand": "Mercedes",
    73  				},
    74  			},
    75  			expectedClientCall: []string{"car brand mercedes"},
    76  		},
    77  
    78  		{
    79  			name: "object with one non-string prop",
    80  			input: &models.Object{
    81  				Class: "Car",
    82  				Properties: map[string]interface{}{
    83  					"power": 300,
    84  				},
    85  			},
    86  			expectedClientCall: []string{"car"},
    87  		},
    88  
    89  		{
    90  			name: "object with a mix of props",
    91  			input: &models.Object{
    92  				Class: "Car",
    93  				Properties: map[string]interface{}{
    94  					"brand":  "best brand",
    95  					"power":  300,
    96  					"review": "a very great car",
    97  				},
    98  			},
    99  			expectedClientCall: []string{"car brand best brand review a very great car"},
   100  		},
   101  		{
   102  			name:    "with a noindexed property",
   103  			noindex: "review",
   104  			input: &models.Object{
   105  				Class: "Car",
   106  				Properties: map[string]interface{}{
   107  					"brand":  "best brand",
   108  					"power":  300,
   109  					"review": "a very great car",
   110  				},
   111  			},
   112  			expectedClientCall: []string{"car brand best brand"},
   113  		},
   114  
   115  		{
   116  			name:          "with the class name not vectorized",
   117  			excludedClass: "Car",
   118  			input: &models.Object{
   119  				Class: "Car",
   120  				Properties: map[string]interface{}{
   121  					"brand":  "best brand",
   122  					"power":  300,
   123  					"review": "a very great car",
   124  				},
   125  			},
   126  			expectedClientCall: []string{"brand best brand review a very great car"},
   127  		},
   128  
   129  		{
   130  			name:             "with a property name not vectorized",
   131  			excludedProperty: "review",
   132  			input: &models.Object{
   133  				Class: "Car",
   134  				Properties: map[string]interface{}{
   135  					"brand":  "best brand",
   136  					"power":  300,
   137  					"review": "a very great car",
   138  				},
   139  			},
   140  			expectedClientCall: []string{"car brand best brand a very great car"},
   141  		},
   142  
   143  		{
   144  			name:             "with no schema labels vectorized",
   145  			excludedProperty: "review",
   146  			excludedClass:    "Car",
   147  			input: &models.Object{
   148  				Class: "Car",
   149  				Properties: map[string]interface{}{
   150  					"review": "a very great car",
   151  				},
   152  			},
   153  			expectedClientCall: []string{"a very great car"},
   154  		},
   155  
   156  		{
   157  			name:             "with string/text arrays without propname or classname",
   158  			excludedProperty: "reviews",
   159  			excludedClass:    "Car",
   160  			input: &models.Object{
   161  				Class: "Car",
   162  				Properties: map[string]interface{}{
   163  					"reviews": []string{
   164  						"a very great car",
   165  						"you should consider buying one",
   166  					},
   167  				},
   168  			},
   169  			expectedClientCall: []string{"a very great car you should consider buying one"},
   170  		},
   171  
   172  		{
   173  			name: "with string/text arrays with propname and classname",
   174  			input: &models.Object{
   175  				Class: "Car",
   176  				Properties: map[string]interface{}{
   177  					"reviews": []string{
   178  						"a very great car",
   179  						"you should consider buying one",
   180  					},
   181  				},
   182  			},
   183  			expectedClientCall: []string{"car reviews a very great car reviews you should consider buying one"},
   184  		},
   185  
   186  		{
   187  			name: "with compound class and prop names",
   188  			input: &models.Object{
   189  				Class: "SuperCar",
   190  				Properties: map[string]interface{}{
   191  					"brandOfTheCar": "best brand",
   192  					"power":         300,
   193  					"review":        "a very great car",
   194  				},
   195  			},
   196  			expectedClientCall: []string{"super car brand of the car best brand review a very great car"},
   197  		},
   198  	}
   199  
   200  	for _, test := range tests {
   201  		t.Run(test.name, func(t *testing.T) {
   202  			ic := &fakeClassConfig{
   203  				excludedProperty:      test.excludedProperty,
   204  				skippedProperty:       test.noindex,
   205  				vectorizeClassName:    test.excludedClass != "Car",
   206  				vectorizePropertyName: true,
   207  			}
   208  
   209  			client := &fakeClient{}
   210  			v := New(client)
   211  
   212  			comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties)
   213  			vector, _, err := v.Object(context.Background(), test.input, comp, ic)
   214  
   215  			require.Nil(t, err)
   216  			assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   217  			expected := strings.Split(test.expectedClientCall[0], " ")
   218  			actual := strings.Split(client.lastInput[0], " ")
   219  			assert.ElementsMatch(t, expected, actual)
   220  		})
   221  	}
   222  }
   223  
   224  func TestVectorizingObjectsWithDiff(t *testing.T) {
   225  	type testCase struct {
   226  		name              string
   227  		input             *models.Object
   228  		skipped           string
   229  		comp              moduletools.VectorizablePropsComparator
   230  		expectedVectorize bool
   231  	}
   232  
   233  	propsSchema := []*models.Property{
   234  		{
   235  			Name:     "brand",
   236  			DataType: schema.DataTypeText.PropString(),
   237  		},
   238  		{
   239  			Name:     "power",
   240  			DataType: schema.DataTypeInt.PropString(),
   241  		},
   242  		{
   243  			Name:     "description",
   244  			DataType: schema.DataTypeText.PropString(),
   245  		},
   246  		{
   247  			Name:     "reviews",
   248  			DataType: schema.DataTypeTextArray.PropString(),
   249  		},
   250  	}
   251  	props := map[string]interface{}{
   252  		"brand":       "best brand",
   253  		"power":       300,
   254  		"description": "a very great car",
   255  		"reviews": []string{
   256  			"a very great car",
   257  			"you should consider buying one",
   258  		},
   259  	}
   260  	vector := []float32{0, 0, 0, 0}
   261  	var vectors models.Vectors
   262  
   263  	tests := []testCase{
   264  		{
   265  			name: "noop comp",
   266  			input: &models.Object{
   267  				Class:      "Car",
   268  				Properties: props,
   269  			},
   270  			comp:              moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props),
   271  			expectedVectorize: true,
   272  		},
   273  		{
   274  			name: "all props unchanged",
   275  			input: &models.Object{
   276  				Class:      "Car",
   277  				Properties: props,
   278  			},
   279  			comp:              moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors),
   280  			expectedVectorize: false,
   281  		},
   282  		{
   283  			name: "one vectorizable prop changed (1)",
   284  			input: &models.Object{
   285  				Class:      "Car",
   286  				Properties: props,
   287  			},
   288  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   289  				"brand":       "old best brand",
   290  				"power":       300,
   291  				"description": "a very great car",
   292  				"reviews": []string{
   293  					"a very great car",
   294  					"you should consider buying one",
   295  				},
   296  			}, vector, vectors),
   297  			expectedVectorize: true,
   298  		},
   299  		{
   300  			name: "one vectorizable prop changed (2)",
   301  			input: &models.Object{
   302  				Class:      "Car",
   303  				Properties: props,
   304  			},
   305  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   306  				"brand":       "best brand",
   307  				"power":       300,
   308  				"description": "old a very great car",
   309  				"reviews": []string{
   310  					"a very great car",
   311  					"you should consider buying one",
   312  				},
   313  			}, vector, vectors),
   314  			expectedVectorize: true,
   315  		},
   316  		{
   317  			name: "one vectorizable prop changed (3)",
   318  			input: &models.Object{
   319  				Class:      "Car",
   320  				Properties: props,
   321  			},
   322  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   323  				"brand":       "best brand",
   324  				"power":       300,
   325  				"description": "a very great car",
   326  				"reviews": []string{
   327  					"old a very great car",
   328  					"you should consider buying one",
   329  				},
   330  			}, vector, vectors),
   331  			expectedVectorize: true,
   332  		},
   333  		{
   334  			name:    "all non-vectorizable props changed",
   335  			skipped: "description",
   336  			input: &models.Object{
   337  				Class:      "Car",
   338  				Properties: props,
   339  			},
   340  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   341  				"brand":       "best brand",
   342  				"power":       123,
   343  				"description": "old a very great car",
   344  				"reviews": []string{
   345  					"a very great car",
   346  					"you should consider buying one",
   347  				},
   348  			}, vector, vectors),
   349  			expectedVectorize: false,
   350  		},
   351  	}
   352  
   353  	for _, test := range tests {
   354  		t.Run(test.name, func(t *testing.T) {
   355  			ic := &fakeClassConfig{
   356  				skippedProperty: test.skipped,
   357  			}
   358  
   359  			client := &fakeClient{}
   360  			v := New(client)
   361  
   362  			vector, _, err := v.Object(context.Background(), test.input, test.comp, ic)
   363  
   364  			require.Nil(t, err)
   365  			if test.expectedVectorize {
   366  				assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   367  				assert.NotNil(t, client.lastInput)
   368  			} else {
   369  				assert.Equal(t, []float32{0, 0, 0, 0}, vector)
   370  				assert.Nil(t, client.lastInput)
   371  			}
   372  		})
   373  	}
   374  }
   375  
   376  func TestVectorizingActions(t *testing.T) {
   377  	type testCase struct {
   378  		name               string
   379  		input              *models.Object
   380  		expectedClientCall []string
   381  		noindex            string
   382  		excludedProperty   string // to simulate a schema where property names aren't vectorized
   383  		excludedClass      string // to simulate a schema where class names aren't vectorized
   384  	}
   385  
   386  	propsSchema := []*models.Property{
   387  		{
   388  			Name:     "brand",
   389  			DataType: schema.DataTypeText.PropString(),
   390  		},
   391  		{
   392  			Name:     "length",
   393  			DataType: schema.DataTypeInt.PropString(),
   394  		},
   395  		{
   396  			Name:     "review",
   397  			DataType: schema.DataTypeText.PropString(),
   398  		},
   399  	}
   400  
   401  	tests := []testCase{
   402  		{
   403  			name: "empty object",
   404  			input: &models.Object{
   405  				Class: "Flight",
   406  			},
   407  			expectedClientCall: []string{"flight"},
   408  		},
   409  		{
   410  			name: "object with one string prop",
   411  			input: &models.Object{
   412  				Class: "Flight",
   413  				Properties: map[string]interface{}{
   414  					"brand": "Mercedes",
   415  				},
   416  			},
   417  			expectedClientCall: []string{"flight brand mercedes"},
   418  		},
   419  
   420  		{
   421  			name: "object with one non-string prop",
   422  			input: &models.Object{
   423  				Class: "Flight",
   424  				Properties: map[string]interface{}{
   425  					"length": 300,
   426  				},
   427  			},
   428  			expectedClientCall: []string{"flight"},
   429  		},
   430  
   431  		{
   432  			name: "object with a mix of props",
   433  			input: &models.Object{
   434  				Class: "Flight",
   435  				Properties: map[string]interface{}{
   436  					"brand":  "best brand",
   437  					"length": 300,
   438  					"review": "a very great flight",
   439  				},
   440  			},
   441  			expectedClientCall: []string{"flight brand best brand review a very great flight"},
   442  		},
   443  	}
   444  
   445  	for _, test := range tests {
   446  		t.Run(test.name, func(t *testing.T) {
   447  			client := &fakeClient{}
   448  			v := New(client)
   449  
   450  			ic := &fakeClassConfig{
   451  				excludedProperty:      test.excludedProperty,
   452  				skippedProperty:       test.noindex,
   453  				vectorizeClassName:    test.excludedClass != "Flight",
   454  				vectorizePropertyName: true,
   455  			}
   456  			comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties)
   457  			vector, _, err := v.Object(context.Background(), test.input, comp, ic)
   458  
   459  			require.Nil(t, err)
   460  			assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   461  			expected := strings.Split(test.expectedClientCall[0], " ")
   462  			actual := strings.Split(client.lastInput[0], " ")
   463  			assert.ElementsMatch(t, expected, actual)
   464  		})
   465  	}
   466  }
   467  
   468  func TestVectorizingSearchTerms(t *testing.T) {
   469  	type testCase struct {
   470  		name               string
   471  		input              []string
   472  		expectedClientCall []string
   473  	}
   474  
   475  	tests := []testCase{
   476  		{
   477  			name:               "single word",
   478  			input:              []string{"car"},
   479  			expectedClientCall: []string{"car"},
   480  		},
   481  		{
   482  			name:               "multiple entries with multiple words",
   483  			input:              []string{"car", "car brand"},
   484  			expectedClientCall: []string{"car", "car brand"},
   485  		},
   486  		{
   487  			name:               "multiple entries with upper casing",
   488  			input:              []string{"Car", "Car Brand"},
   489  			expectedClientCall: []string{"car", "car brand"},
   490  		},
   491  		{
   492  			name:               "with camel cased words",
   493  			input:              []string{"Car", "CarBrand"},
   494  			expectedClientCall: []string{"car", "car brand"},
   495  		},
   496  	}
   497  
   498  	for _, test := range tests {
   499  		t.Run(test.name, func(t *testing.T) {
   500  			client := &fakeClient{}
   501  			v := New(client)
   502  
   503  			res, err := v.Corpi(context.Background(), test.input)
   504  
   505  			require.Nil(t, err)
   506  			assert.Equal(t, []float32{0, 1, 2, 3}, res)
   507  			assert.ElementsMatch(t, test.expectedClientCall, client.lastInput)
   508  		})
   509  	}
   510  }