github.com/weaviate/weaviate@v1.24.6/modules/text2vec-gpt4all/vectorizer/objects_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"context"
    16  	"strings"
    17  	"testing"
    18  
    19  	"github.com/stretchr/testify/assert"
    20  	"github.com/stretchr/testify/require"
    21  	"github.com/weaviate/weaviate/entities/models"
    22  	"github.com/weaviate/weaviate/entities/moduletools"
    23  	"github.com/weaviate/weaviate/entities/schema"
    24  )
    25  
    26  func TestVectorizingObjects(t *testing.T) {
    27  	type testCase struct {
    28  		name               string
    29  		input              *models.Object
    30  		expectedClientCall string
    31  		noindex            string
    32  		excludedProperty   string // to simulate a schema where property names aren't vectorized
    33  		excludedClass      string // to simulate a schema where class names aren't vectorized
    34  	}
    35  
    36  	propsSchema := []*models.Property{
    37  		{
    38  			Name:     "brand",
    39  			DataType: schema.DataTypeText.PropString(),
    40  		},
    41  		{
    42  			Name:     "power",
    43  			DataType: schema.DataTypeInt.PropString(),
    44  		},
    45  		{
    46  			Name:     "review",
    47  			DataType: schema.DataTypeText.PropString(),
    48  		},
    49  		{
    50  			Name:     "brandOfTheCar",
    51  			DataType: schema.DataTypeText.PropString(),
    52  		},
    53  		{
    54  			Name:     "reviews",
    55  			DataType: schema.DataTypeTextArray.PropString(),
    56  		},
    57  	}
    58  
    59  	tests := []testCase{
    60  		{
    61  			name: "empty object",
    62  			input: &models.Object{
    63  				Class: "Car",
    64  			},
    65  			expectedClientCall: "car",
    66  		},
    67  		{
    68  			name: "object with one string prop",
    69  			input: &models.Object{
    70  				Class: "Car",
    71  				Properties: map[string]interface{}{
    72  					"brand": "Mercedes",
    73  				},
    74  			},
    75  			expectedClientCall: "car brand mercedes",
    76  		},
    77  		{
    78  			name: "object with one non-string prop",
    79  			input: &models.Object{
    80  				Class: "Car",
    81  				Properties: map[string]interface{}{
    82  					"power": 300,
    83  				},
    84  			},
    85  			expectedClientCall: "car",
    86  		},
    87  		{
    88  			name: "object with a mix of props",
    89  			input: &models.Object{
    90  				Class: "Car",
    91  				Properties: map[string]interface{}{
    92  					"brand":  "best brand",
    93  					"power":  300,
    94  					"review": "a very great car",
    95  				},
    96  			},
    97  			expectedClientCall: "car brand best brand review a very great car",
    98  		},
    99  		{
   100  			name:    "with a noindexed property",
   101  			noindex: "review",
   102  			input: &models.Object{
   103  				Class: "Car",
   104  				Properties: map[string]interface{}{
   105  					"brand":  "best brand",
   106  					"power":  300,
   107  					"review": "a very great car",
   108  				},
   109  			},
   110  			expectedClientCall: "car brand best brand",
   111  		},
   112  
   113  		{
   114  			name:          "with the class name not vectorized",
   115  			excludedClass: "Car",
   116  			input: &models.Object{
   117  				Class: "Car",
   118  				Properties: map[string]interface{}{
   119  					"brand":  "best brand",
   120  					"power":  300,
   121  					"review": "a very great car",
   122  				},
   123  			},
   124  			expectedClientCall: "brand best brand review a very great car",
   125  		},
   126  		{
   127  			name:             "with a property name not vectorized",
   128  			excludedProperty: "review",
   129  			input: &models.Object{
   130  				Class: "Car",
   131  				Properties: map[string]interface{}{
   132  					"brand":  "best brand",
   133  					"power":  300,
   134  					"review": "a very great car",
   135  				},
   136  			},
   137  			expectedClientCall: "car brand best brand a very great car",
   138  		},
   139  		{
   140  			name:             "with no schema labels vectorized",
   141  			excludedProperty: "review",
   142  			excludedClass:    "Car",
   143  			input: &models.Object{
   144  				Class: "Car",
   145  				Properties: map[string]interface{}{
   146  					"review": "a very great car",
   147  				},
   148  			},
   149  			expectedClientCall: "a very great car",
   150  		},
   151  		{
   152  			name:             "with string/text arrays without propname or classname",
   153  			excludedProperty: "reviews",
   154  			excludedClass:    "Car",
   155  			input: &models.Object{
   156  				Class: "Car",
   157  				Properties: map[string]interface{}{
   158  					"reviews": []string{
   159  						"a very great car",
   160  						"you should consider buying one",
   161  					},
   162  				},
   163  			},
   164  			expectedClientCall: "a very great car you should consider buying one",
   165  		},
   166  		{
   167  			name: "with string/text arrays with propname and classname",
   168  			input: &models.Object{
   169  				Class: "Car",
   170  				Properties: map[string]interface{}{
   171  					"reviews": []string{
   172  						"a very great car",
   173  						"you should consider buying one",
   174  					},
   175  				},
   176  			},
   177  			expectedClientCall: "car reviews a very great car reviews you should consider buying one",
   178  		},
   179  		{
   180  			name: "with compound class and prop names",
   181  			input: &models.Object{
   182  				Class: "SuperCar",
   183  				Properties: map[string]interface{}{
   184  					"brandOfTheCar": "best brand",
   185  					"power":         300,
   186  					"review":        "a very great car",
   187  				},
   188  			},
   189  			expectedClientCall: "super car brand of the car best brand review a very great car",
   190  		},
   191  	}
   192  
   193  	for _, test := range tests {
   194  		t.Run(test.name, func(t *testing.T) {
   195  			client := &fakeClient{}
   196  
   197  			v := New(client)
   198  
   199  			ic := &fakeClassConfig{
   200  				excludedProperty:      test.excludedProperty,
   201  				skippedProperty:       test.noindex,
   202  				vectorizeClassName:    test.excludedClass != "Car",
   203  				vectorizePropertyName: true,
   204  			}
   205  			comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties)
   206  			vector, _, err := v.Object(context.Background(), test.input, comp, ic)
   207  
   208  			require.Nil(t, err)
   209  			assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   210  			expected := strings.Split(test.expectedClientCall, " ")
   211  			actual := strings.Split(client.lastInput, " ")
   212  			assert.Equal(t, expected, actual)
   213  		})
   214  	}
   215  }
   216  
   217  func TestVectorizingObjectsWithDiff(t *testing.T) {
   218  	type testCase struct {
   219  		name              string
   220  		input             *models.Object
   221  		skipped           string
   222  		comp              moduletools.VectorizablePropsComparator
   223  		expectedVectorize bool
   224  	}
   225  
   226  	propsSchema := []*models.Property{
   227  		{
   228  			Name:     "brand",
   229  			DataType: schema.DataTypeText.PropString(),
   230  		},
   231  		{
   232  			Name:     "power",
   233  			DataType: schema.DataTypeInt.PropString(),
   234  		},
   235  		{
   236  			Name:     "description",
   237  			DataType: schema.DataTypeText.PropString(),
   238  		},
   239  		{
   240  			Name:     "reviews",
   241  			DataType: schema.DataTypeTextArray.PropString(),
   242  		},
   243  	}
   244  	props := map[string]interface{}{
   245  		"brand":       "best brand",
   246  		"power":       300,
   247  		"description": "a very great car",
   248  		"reviews": []string{
   249  			"a very great car",
   250  			"you should consider buying one",
   251  		},
   252  	}
   253  	vector := []float32{0, 0, 0, 0}
   254  	var vectors models.Vectors
   255  
   256  	tests := []testCase{
   257  		{
   258  			name: "noop comp",
   259  			input: &models.Object{
   260  				Class:      "Car",
   261  				Properties: props,
   262  			},
   263  			comp:              moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props),
   264  			expectedVectorize: true,
   265  		},
   266  		{
   267  			name: "all props unchanged",
   268  			input: &models.Object{
   269  				Class:      "Car",
   270  				Properties: props,
   271  			},
   272  			comp:              moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors),
   273  			expectedVectorize: false,
   274  		},
   275  		{
   276  			name: "one vectorizable prop changed (1)",
   277  			input: &models.Object{
   278  				Class:      "Car",
   279  				Properties: props,
   280  			},
   281  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   282  				"brand":       "old best brand",
   283  				"power":       300,
   284  				"description": "a very great car",
   285  				"reviews": []string{
   286  					"a very great car",
   287  					"you should consider buying one",
   288  				},
   289  			}, vector, vectors),
   290  			expectedVectorize: true,
   291  		},
   292  		{
   293  			name: "one vectorizable prop changed (2)",
   294  			input: &models.Object{
   295  				Class:      "Car",
   296  				Properties: props,
   297  			},
   298  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   299  				"brand":       "best brand",
   300  				"power":       300,
   301  				"description": "old a very great car",
   302  				"reviews": []string{
   303  					"a very great car",
   304  					"you should consider buying one",
   305  				},
   306  			}, vector, vectors),
   307  			expectedVectorize: true,
   308  		},
   309  		{
   310  			name: "one vectorizable prop changed (3)",
   311  			input: &models.Object{
   312  				Class:      "Car",
   313  				Properties: props,
   314  			},
   315  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   316  				"brand":       "best brand",
   317  				"power":       300,
   318  				"description": "a very great car",
   319  				"reviews": []string{
   320  					"old a very great car",
   321  					"you should consider buying one",
   322  				},
   323  			}, vector, vectors),
   324  			expectedVectorize: true,
   325  		},
   326  		{
   327  			name:    "all non-vectorizable props changed",
   328  			skipped: "description",
   329  			input: &models.Object{
   330  				Class:      "Car",
   331  				Properties: props,
   332  			},
   333  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   334  				"brand":       "best brand",
   335  				"power":       123,
   336  				"description": "old a very great car",
   337  				"reviews": []string{
   338  					"a very great car",
   339  					"you should consider buying one",
   340  				},
   341  			}, vector, vectors),
   342  			expectedVectorize: false,
   343  		},
   344  	}
   345  
   346  	for _, test := range tests {
   347  		t.Run(test.name, func(t *testing.T) {
   348  			ic := &fakeClassConfig{
   349  				skippedProperty: test.skipped,
   350  			}
   351  
   352  			client := &fakeClient{}
   353  			v := New(client)
   354  
   355  			vector, _, err := v.Object(context.Background(), test.input, test.comp, ic)
   356  
   357  			require.Nil(t, err)
   358  			if test.expectedVectorize {
   359  				assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   360  				assert.NotEmpty(t, client.lastInput)
   361  			} else {
   362  				assert.Equal(t, []float32{0, 0, 0, 0}, vector)
   363  				assert.Empty(t, client.lastInput)
   364  			}
   365  		})
   366  	}
   367  }