github.com/weaviate/weaviate@v1.24.6/modules/text2vec-palm/vectorizer/objects_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"context"
    16  	"strings"
    17  	"testing"
    18  
    19  	"github.com/stretchr/testify/assert"
    20  	"github.com/stretchr/testify/require"
    21  	"github.com/weaviate/weaviate/entities/models"
    22  	"github.com/weaviate/weaviate/entities/moduletools"
    23  	"github.com/weaviate/weaviate/entities/schema"
    24  )
    25  
    26  // These are mostly copy/pasted (with minimal additions) from the
    27  // text2vec-contextionary module
    28  func TestVectorizingObjects(t *testing.T) {
    29  	type testCase struct {
    30  		name               string
    31  		input              *models.Object
    32  		expectedClientCall string
    33  		noindex            string
    34  		excludedProperty   string // to simulate a schema where property names aren't vectorized
    35  		excludedClass      string // to simulate a schema where class names aren't vectorized
    36  		palmModel          string
    37  	}
    38  
    39  	propsSchema := []*models.Property{
    40  		{
    41  			Name:     "brand",
    42  			DataType: schema.DataTypeText.PropString(),
    43  		},
    44  		{
    45  			Name:     "power",
    46  			DataType: schema.DataTypeInt.PropString(),
    47  		},
    48  		{
    49  			Name:     "review",
    50  			DataType: schema.DataTypeText.PropString(),
    51  		},
    52  		{
    53  			Name:     "brandOfTheCar",
    54  			DataType: schema.DataTypeText.PropString(),
    55  		},
    56  		{
    57  			Name:     "reviews",
    58  			DataType: schema.DataTypeTextArray.PropString(),
    59  		},
    60  	}
    61  
    62  	tests := []testCase{
    63  		{
    64  			name: "empty object",
    65  			input: &models.Object{
    66  				Class: "Car",
    67  			},
    68  			palmModel:          "large",
    69  			expectedClientCall: "car",
    70  		},
    71  		{
    72  			name: "object with one string prop",
    73  			input: &models.Object{
    74  				Class: "Car",
    75  				Properties: map[string]interface{}{
    76  					"brand": "Mercedes",
    77  				},
    78  			},
    79  			expectedClientCall: "car brand mercedes",
    80  		},
    81  		{
    82  			name: "object with one non-string prop",
    83  			input: &models.Object{
    84  				Class: "Car",
    85  				Properties: map[string]interface{}{
    86  					"power": 300,
    87  				},
    88  			},
    89  			expectedClientCall: "car",
    90  		},
    91  		{
    92  			name: "object with a mix of props",
    93  			input: &models.Object{
    94  				Class: "Car",
    95  				Properties: map[string]interface{}{
    96  					"brand":  "best brand",
    97  					"power":  300,
    98  					"review": "a very great car",
    99  				},
   100  			},
   101  			expectedClientCall: "car brand best brand review a very great car",
   102  		},
   103  		{
   104  			name:    "with a noindexed property",
   105  			noindex: "review",
   106  			input: &models.Object{
   107  				Class: "Car",
   108  				Properties: map[string]interface{}{
   109  					"brand":  "best brand",
   110  					"power":  300,
   111  					"review": "a very great car",
   112  				},
   113  			},
   114  			expectedClientCall: "car brand best brand",
   115  		},
   116  		{
   117  			name:          "with the class name not vectorized",
   118  			excludedClass: "Car",
   119  			input: &models.Object{
   120  				Class: "Car",
   121  				Properties: map[string]interface{}{
   122  					"brand":  "best brand",
   123  					"power":  300,
   124  					"review": "a very great car",
   125  				},
   126  			},
   127  			expectedClientCall: "brand best brand review a very great car",
   128  		},
   129  		{
   130  			name:             "with a property name not vectorized",
   131  			excludedProperty: "review",
   132  			input: &models.Object{
   133  				Class: "Car",
   134  				Properties: map[string]interface{}{
   135  					"brand":  "best brand",
   136  					"power":  300,
   137  					"review": "a very great car",
   138  				},
   139  			},
   140  			expectedClientCall: "car brand best brand a very great car",
   141  		},
   142  		{
   143  			name:             "with no schema labels vectorized",
   144  			excludedProperty: "review",
   145  			excludedClass:    "Car",
   146  			input: &models.Object{
   147  				Class: "Car",
   148  				Properties: map[string]interface{}{
   149  					"review": "a very great car",
   150  				},
   151  			},
   152  			expectedClientCall: "a very great car",
   153  		},
   154  		{
   155  			name:             "with string/text arrays without propname or classname",
   156  			excludedProperty: "reviews",
   157  			excludedClass:    "Car",
   158  			input: &models.Object{
   159  				Class: "Car",
   160  				Properties: map[string]interface{}{
   161  					"reviews": []string{
   162  						"a very great car",
   163  						"you should consider buying one",
   164  					},
   165  				},
   166  			},
   167  			expectedClientCall: "a very great car you should consider buying one",
   168  		},
   169  		{
   170  			name: "with string/text arrays with propname and classname",
   171  			input: &models.Object{
   172  				Class: "Car",
   173  				Properties: map[string]interface{}{
   174  					"reviews": []string{
   175  						"a very great car",
   176  						"you should consider buying one",
   177  					},
   178  				},
   179  			},
   180  			expectedClientCall: "car reviews a very great car reviews you should consider buying one",
   181  		},
   182  		{
   183  			name: "with compound class and prop names",
   184  			input: &models.Object{
   185  				Class: "SuperCar",
   186  				Properties: map[string]interface{}{
   187  					"brandOfTheCar": "best brand",
   188  					"power":         300,
   189  					"review":        "a very great car",
   190  				},
   191  			},
   192  			expectedClientCall: "super car brand of the car best brand review a very great car",
   193  		},
   194  	}
   195  
   196  	for _, test := range tests {
   197  		t.Run(test.name, func(t *testing.T) {
   198  			client := &fakeClient{}
   199  
   200  			v := New(client)
   201  
   202  			ic := &fakeClassConfig{
   203  				skippedProperty:       test.noindex,
   204  				vectorizeClassName:    test.excludedClass != "Car",
   205  				excludedProperty:      test.excludedProperty,
   206  				vectorizePropertyName: true,
   207  				apiEndpoint:           "",
   208  				projectID:             "",
   209  				endpointID:            "",
   210  				modelID:               "",
   211  			}
   212  			comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties)
   213  			vector, _, err := v.Object(context.Background(), test.input, comp, ic)
   214  
   215  			require.Nil(t, err)
   216  			assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   217  			expected := strings.Split(test.expectedClientCall, " ")
   218  			actual := strings.Split(client.lastInput[0], " ")
   219  			assert.Equal(t, expected, actual)
   220  		})
   221  	}
   222  }
   223  
   224  func TestVectorizingObjectsWithDiff(t *testing.T) {
   225  	type testCase struct {
   226  		name              string
   227  		input             *models.Object
   228  		skipped           string
   229  		comp              moduletools.VectorizablePropsComparator
   230  		expectedVectorize bool
   231  	}
   232  
   233  	propsSchema := []*models.Property{
   234  		{
   235  			Name:     "brand",
   236  			DataType: schema.DataTypeText.PropString(),
   237  		},
   238  		{
   239  			Name:     "power",
   240  			DataType: schema.DataTypeInt.PropString(),
   241  		},
   242  		{
   243  			Name:     "description",
   244  			DataType: schema.DataTypeText.PropString(),
   245  		},
   246  		{
   247  			Name:     "reviews",
   248  			DataType: schema.DataTypeTextArray.PropString(),
   249  		},
   250  	}
   251  	props := map[string]interface{}{
   252  		"brand":       "best brand",
   253  		"power":       300,
   254  		"description": "a very great car",
   255  		"reviews": []string{
   256  			"a very great car",
   257  			"you should consider buying one",
   258  		},
   259  	}
   260  	vector := []float32{0, 0, 0, 0}
   261  	var vectors models.Vectors
   262  
   263  	tests := []testCase{
   264  		{
   265  			name: "noop comp",
   266  			input: &models.Object{
   267  				Class:      "Car",
   268  				Properties: props,
   269  			},
   270  			comp:              moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props),
   271  			expectedVectorize: true,
   272  		},
   273  		{
   274  			name: "all props unchanged",
   275  			input: &models.Object{
   276  				Class:      "Car",
   277  				Properties: props,
   278  			},
   279  			comp:              moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors),
   280  			expectedVectorize: false,
   281  		},
   282  		{
   283  			name: "one vectorizable prop changed (1)",
   284  			input: &models.Object{
   285  				Class:      "Car",
   286  				Properties: props,
   287  			},
   288  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   289  				"brand":       "old best brand",
   290  				"power":       300,
   291  				"description": "a very great car",
   292  				"reviews": []string{
   293  					"a very great car",
   294  					"you should consider buying one",
   295  				},
   296  			}, vector, vectors),
   297  			expectedVectorize: true,
   298  		},
   299  		{
   300  			name: "diff one vectorizable prop changed (2)",
   301  			input: &models.Object{
   302  				Class:      "Car",
   303  				Properties: props,
   304  			},
   305  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   306  				"brand":       "best brand",
   307  				"power":       300,
   308  				"description": "old a very great car",
   309  				"reviews": []string{
   310  					"a very great car",
   311  					"you should consider buying one",
   312  				},
   313  			}, vector, vectors),
   314  			expectedVectorize: true,
   315  		},
   316  		{
   317  			name: "one vectorizable prop changed (3)",
   318  			input: &models.Object{
   319  				Class:      "Car",
   320  				Properties: props,
   321  			},
   322  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   323  				"brand":       "best brand",
   324  				"power":       300,
   325  				"description": "a very great car",
   326  				"reviews": []string{
   327  					"old a very great car",
   328  					"you should consider buying one",
   329  				},
   330  			}, vector, vectors),
   331  			expectedVectorize: true,
   332  		},
   333  		{
   334  			name:    "all non-vectorizable props changed",
   335  			skipped: "description",
   336  			input: &models.Object{
   337  				Class:      "Car",
   338  				Properties: props,
   339  			},
   340  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   341  				"brand":       "best brand",
   342  				"power":       123,
   343  				"description": "old a very great car",
   344  				"reviews": []string{
   345  					"a very great car",
   346  					"you should consider buying one",
   347  				},
   348  			}, vector, vectors),
   349  			expectedVectorize: false,
   350  		},
   351  	}
   352  
   353  	for _, test := range tests {
   354  		t.Run(test.name, func(t *testing.T) {
   355  			ic := &fakeClassConfig{
   356  				skippedProperty: test.skipped,
   357  			}
   358  
   359  			client := &fakeClient{}
   360  			v := New(client)
   361  
   362  			vector, _, err := v.Object(context.Background(), test.input, test.comp, ic)
   363  
   364  			require.Nil(t, err)
   365  			if test.expectedVectorize {
   366  				assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   367  				assert.NotEmpty(t, client.lastInput)
   368  			} else {
   369  				assert.Equal(t, []float32{0, 0, 0, 0}, vector)
   370  				assert.Empty(t, client.lastInput)
   371  			}
   372  		})
   373  	}
   374  }