github.com/weaviate/weaviate@v1.24.6/modules/text2vec-cohere/vectorizer/objects_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"context"
    16  	"strings"
    17  	"testing"
    18  
    19  	"github.com/stretchr/testify/assert"
    20  	"github.com/stretchr/testify/require"
    21  	"github.com/weaviate/weaviate/entities/models"
    22  	"github.com/weaviate/weaviate/entities/moduletools"
    23  	"github.com/weaviate/weaviate/entities/schema"
    24  )
    25  
    26  // These are mostly copy/pasted (with minimal additions) from the
    27  // text2vec-contextionary module
    28  func TestVectorizingObjects(t *testing.T) {
    29  	type testCase struct {
    30  		name                string
    31  		input               *models.Object
    32  		expectedClientCall  string
    33  		expectedCohereModel string
    34  		noindex             string
    35  		excludedProperty    string // to simulate a schema where property names aren't vectorized
    36  		excludedClass       string // to simulate a schema where class names aren't vectorized
    37  		cohereModel         string
    38  	}
    39  
    40  	propsSchema := []*models.Property{
    41  		{
    42  			Name:     "brand",
    43  			DataType: schema.DataTypeText.PropString(),
    44  		},
    45  		{
    46  			Name:     "power",
    47  			DataType: schema.DataTypeInt.PropString(),
    48  		},
    49  		{
    50  			Name:     "review",
    51  			DataType: schema.DataTypeText.PropString(),
    52  		},
    53  		{
    54  			Name:     "brandOfTheCar",
    55  			DataType: schema.DataTypeText.PropString(),
    56  		},
    57  		{
    58  			Name:     "reviews",
    59  			DataType: schema.DataTypeTextArray.PropString(),
    60  		},
    61  	}
    62  
    63  	tests := []testCase{
    64  		{
    65  			name: "empty object",
    66  			input: &models.Object{
    67  				Class: "Car",
    68  			},
    69  			cohereModel:         "large",
    70  			expectedCohereModel: "large",
    71  			expectedClientCall:  "car",
    72  		},
    73  		{
    74  			name: "object with one string prop",
    75  			input: &models.Object{
    76  				Class: "Car",
    77  				Properties: map[string]interface{}{
    78  					"brand": "Mercedes",
    79  				},
    80  			},
    81  			expectedClientCall: "car brand mercedes",
    82  		},
    83  		{
    84  			name: "object with one non-string prop",
    85  			input: &models.Object{
    86  				Class: "Car",
    87  				Properties: map[string]interface{}{
    88  					"power": 300,
    89  				},
    90  			},
    91  			expectedClientCall: "car",
    92  		},
    93  		{
    94  			name: "object with a mix of props",
    95  			input: &models.Object{
    96  				Class: "Car",
    97  				Properties: map[string]interface{}{
    98  					"brand":  "best brand",
    99  					"power":  300,
   100  					"review": "a very great car",
   101  				},
   102  			},
   103  			expectedClientCall: "car brand best brand review a very great car",
   104  		},
   105  		{
   106  			name:    "with a noindexed property",
   107  			noindex: "review",
   108  			input: &models.Object{
   109  				Class: "Car",
   110  				Properties: map[string]interface{}{
   111  					"brand":  "best brand",
   112  					"power":  300,
   113  					"review": "a very great car",
   114  				},
   115  			},
   116  			expectedClientCall: "car brand best brand",
   117  		},
   118  		{
   119  			name:          "with the class name not vectorized",
   120  			excludedClass: "Car",
   121  			input: &models.Object{
   122  				Class: "Car",
   123  				Properties: map[string]interface{}{
   124  					"brand":  "best brand",
   125  					"power":  300,
   126  					"review": "a very great car",
   127  				},
   128  			},
   129  			expectedClientCall: "brand best brand review a very great car",
   130  		},
   131  		{
   132  			name:             "with a property name not vectorized",
   133  			excludedProperty: "review",
   134  			input: &models.Object{
   135  				Class: "Car",
   136  				Properties: map[string]interface{}{
   137  					"brand":  "best brand",
   138  					"power":  300,
   139  					"review": "a very great car",
   140  				},
   141  			},
   142  			expectedClientCall: "car brand best brand a very great car",
   143  		},
   144  		{
   145  			name:             "with no schema labels vectorized",
   146  			excludedProperty: "review",
   147  			excludedClass:    "Car",
   148  			input: &models.Object{
   149  				Class: "Car",
   150  				Properties: map[string]interface{}{
   151  					"review": "a very great car",
   152  				},
   153  			},
   154  			expectedClientCall: "a very great car",
   155  		},
   156  		{
   157  			name:             "with string/text arrays without propname or classname",
   158  			excludedProperty: "reviews",
   159  			excludedClass:    "Car",
   160  			input: &models.Object{
   161  				Class: "Car",
   162  				Properties: map[string]interface{}{
   163  					"reviews": []string{
   164  						"a very great car",
   165  						"you should consider buying one",
   166  					},
   167  				},
   168  			},
   169  			expectedClientCall: "a very great car you should consider buying one",
   170  		},
   171  		{
   172  			name: "with string/text arrays with propname and classname",
   173  			input: &models.Object{
   174  				Class: "Car",
   175  				Properties: map[string]interface{}{
   176  					"reviews": []string{
   177  						"a very great car",
   178  						"you should consider buying one",
   179  					},
   180  				},
   181  			},
   182  			expectedClientCall: "car reviews a very great car reviews you should consider buying one",
   183  		},
   184  		{
   185  			name: "with compound class and prop names",
   186  			input: &models.Object{
   187  				Class: "SuperCar",
   188  				Properties: map[string]interface{}{
   189  					"brandOfTheCar": "best brand",
   190  					"power":         300,
   191  					"review":        "a very great car",
   192  				},
   193  			},
   194  			expectedClientCall: "super car brand of the car best brand review a very great car",
   195  		},
   196  	}
   197  
   198  	for _, test := range tests {
   199  		t.Run(test.name, func(t *testing.T) {
   200  			client := &fakeClient{}
   201  
   202  			v := New(client)
   203  
   204  			ic := &fakeClassConfig{
   205  				excludedProperty:      test.excludedProperty,
   206  				skippedProperty:       test.noindex,
   207  				vectorizeClassName:    test.excludedClass != "Car",
   208  				cohereModel:           test.cohereModel,
   209  				vectorizePropertyName: true,
   210  			}
   211  			comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties)
   212  			vector, _, err := v.Object(context.Background(), test.input, comp, ic)
   213  
   214  			require.Nil(t, err)
   215  			assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   216  			expected := strings.Split(test.expectedClientCall, " ")
   217  			actual := strings.Split(client.lastInput[0], " ")
   218  			assert.Equal(t, expected, actual)
   219  			assert.Equal(t, test.expectedCohereModel, client.lastConfig.Model)
   220  		})
   221  	}
   222  }
   223  
   224  func TestVectorizingObjectsWithDiff(t *testing.T) {
   225  	type testCase struct {
   226  		name              string
   227  		input             *models.Object
   228  		skipped           string
   229  		comp              moduletools.VectorizablePropsComparator
   230  		expectedVectorize bool
   231  	}
   232  
   233  	propsSchema := []*models.Property{
   234  		{
   235  			Name:     "brand",
   236  			DataType: schema.DataTypeText.PropString(),
   237  		},
   238  		{
   239  			Name:     "power",
   240  			DataType: schema.DataTypeInt.PropString(),
   241  		},
   242  		{
   243  			Name:     "description",
   244  			DataType: schema.DataTypeText.PropString(),
   245  		},
   246  		{
   247  			Name:     "reviews",
   248  			DataType: schema.DataTypeTextArray.PropString(),
   249  		},
   250  	}
   251  	props := map[string]interface{}{
   252  		"brand":       "best brand",
   253  		"power":       300,
   254  		"description": "a very great car",
   255  		"reviews": []string{
   256  			"a very great car",
   257  			"you should consider buying one",
   258  		},
   259  	}
   260  	vector := []float32{0, 0, 0, 0}
   261  	var vectors models.Vectors
   262  
   263  	tests := []testCase{
   264  		{
   265  			name: "noop comp",
   266  			input: &models.Object{
   267  				Class:      "Car",
   268  				Properties: props,
   269  			},
   270  			comp:              moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props),
   271  			expectedVectorize: true,
   272  		},
   273  		{
   274  			name: "all props unchanged",
   275  			input: &models.Object{
   276  				Class:      "Car",
   277  				Properties: props,
   278  			},
   279  			comp:              moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors),
   280  			expectedVectorize: false,
   281  		},
   282  		{
   283  			name: "diff one vectorizable prop changed (1)",
   284  			input: &models.Object{
   285  				Class:      "Car",
   286  				Properties: props,
   287  			},
   288  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   289  				"brand":       "old best brand",
   290  				"power":       300,
   291  				"description": "a very great car",
   292  				"reviews": []string{
   293  					"a very great car",
   294  					"you should consider buying one",
   295  				},
   296  			}, vector, vectors),
   297  			expectedVectorize: true,
   298  		},
   299  		{
   300  			name: "one vectorizable prop changed (2)",
   301  			input: &models.Object{
   302  				Class:      "Car",
   303  				Properties: props,
   304  			},
   305  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   306  				"brand":       "best brand",
   307  				"power":       300,
   308  				"description": "old a very great car",
   309  				"reviews": []string{
   310  					"a very great car",
   311  					"you should consider buying one",
   312  				},
   313  			}, vector, vectors),
   314  			expectedVectorize: true,
   315  		},
   316  		{
   317  			name: "one vectorizable prop changed (3)",
   318  			input: &models.Object{
   319  				Class:      "Car",
   320  				Properties: props,
   321  			},
   322  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   323  				"brand":       "best brand",
   324  				"power":       300,
   325  				"description": "a very great car",
   326  				"reviews": []string{
   327  					"old a very great car",
   328  					"you should consider buying one",
   329  				},
   330  			}, vector, vectors),
   331  			expectedVectorize: true,
   332  		},
   333  		{
   334  			name:    "all non-vectorizable props changed",
   335  			skipped: "description",
   336  			input: &models.Object{
   337  				Class:      "Car",
   338  				Properties: props,
   339  			},
   340  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   341  				"brand":       "best brand",
   342  				"power":       123,
   343  				"description": "old a very great car",
   344  				"reviews": []string{
   345  					"a very great car",
   346  					"you should consider buying one",
   347  				},
   348  			}, vector, vectors),
   349  			expectedVectorize: false,
   350  		},
   351  	}
   352  
   353  	for _, test := range tests {
   354  		t.Run(test.name, func(t *testing.T) {
   355  			ic := &fakeClassConfig{
   356  				skippedProperty: test.skipped,
   357  			}
   358  
   359  			client := &fakeClient{}
   360  			v := New(client)
   361  
   362  			vector, _, err := v.Object(context.Background(), test.input, test.comp, ic)
   363  
   364  			require.Nil(t, err)
   365  			if test.expectedVectorize {
   366  				assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   367  				assert.NotEmpty(t, client.lastInput)
   368  			} else {
   369  				assert.Equal(t, []float32{0, 0, 0, 0}, vector)
   370  				assert.Empty(t, client.lastInput)
   371  			}
   372  		})
   373  	}
   374  }