github.com/weaviate/weaviate@v1.24.6/modules/text2vec-huggingface/vectorizer/objects_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"context"
    16  	"strings"
    17  	"testing"
    18  
    19  	"github.com/stretchr/testify/assert"
    20  	"github.com/stretchr/testify/require"
    21  	"github.com/weaviate/weaviate/entities/models"
    22  	"github.com/weaviate/weaviate/entities/moduletools"
    23  	"github.com/weaviate/weaviate/entities/schema"
    24  )
    25  
    26  // These are mostly copy/pasted (with minimal additions) from the
    27  // text2vec-contextionary module
    28  func TestVectorizingObjects(t *testing.T) {
    29  	type testCase struct {
    30  		name                     string
    31  		input                    *models.Object
    32  		expectedClientCall       string
    33  		expectedHuggingFaceModel string
    34  		noindex                  string
    35  		excludedProperty         string // to simulate a schema where property names aren't vectorized
    36  		excludedClass            string // to simulate a schema where class names aren't vectorized
    37  		passageModel             string
    38  		endpointURL              string
    39  	}
    40  
    41  	propsSchema := []*models.Property{
    42  		{
    43  			Name:     "brand",
    44  			DataType: schema.DataTypeText.PropString(),
    45  		},
    46  		{
    47  			Name:     "power",
    48  			DataType: schema.DataTypeInt.PropString(),
    49  		},
    50  		{
    51  			Name:     "review",
    52  			DataType: schema.DataTypeText.PropString(),
    53  		},
    54  		{
    55  			Name:     "brandOfTheCar",
    56  			DataType: schema.DataTypeText.PropString(),
    57  		},
    58  		{
    59  			Name:     "reviews",
    60  			DataType: schema.DataTypeTextArray.PropString(),
    61  		},
    62  	}
    63  
    64  	tests := []testCase{
    65  		{
    66  			name: "empty object",
    67  			input: &models.Object{
    68  				Class: "Car",
    69  			},
    70  			passageModel:             "sentence-transformers/gtr-t5-xl",
    71  			expectedHuggingFaceModel: "sentence-transformers/gtr-t5-xl",
    72  			expectedClientCall:       "car",
    73  		},
    74  		{
    75  			name: "object with one string prop",
    76  			input: &models.Object{
    77  				Class: "Car",
    78  				Properties: map[string]interface{}{
    79  					"brand": "Mercedes",
    80  				},
    81  			},
    82  			expectedClientCall: "car brand mercedes",
    83  		},
    84  		{
    85  			name: "object with one non-string prop",
    86  			input: &models.Object{
    87  				Class: "Car",
    88  				Properties: map[string]interface{}{
    89  					"power": 300,
    90  				},
    91  			},
    92  			expectedClientCall: "car",
    93  		},
    94  		{
    95  			name: "object with a mix of props",
    96  			input: &models.Object{
    97  				Class: "Car",
    98  				Properties: map[string]interface{}{
    99  					"brand":  "best brand",
   100  					"power":  300,
   101  					"review": "a very great car",
   102  				},
   103  			},
   104  			expectedClientCall: "car brand best brand review a very great car",
   105  		},
   106  		{
   107  			name:    "with a noindexed property",
   108  			noindex: "review",
   109  			input: &models.Object{
   110  				Class: "Car",
   111  				Properties: map[string]interface{}{
   112  					"brand":  "best brand",
   113  					"power":  300,
   114  					"review": "a very great car",
   115  				},
   116  			},
   117  			expectedClientCall: "car brand best brand",
   118  		},
   119  		{
   120  			name:          "with the class name not vectorized",
   121  			excludedClass: "Car",
   122  			input: &models.Object{
   123  				Class: "Car",
   124  				Properties: map[string]interface{}{
   125  					"brand":  "best brand",
   126  					"power":  300,
   127  					"review": "a very great car",
   128  				},
   129  			},
   130  			expectedClientCall: "brand best brand review a very great car",
   131  		},
   132  		{
   133  			name:             "with a property name not vectorized",
   134  			excludedProperty: "review",
   135  			input: &models.Object{
   136  				Class: "Car",
   137  				Properties: map[string]interface{}{
   138  					"brand":  "best brand",
   139  					"power":  300,
   140  					"review": "a very great car",
   141  				},
   142  			},
   143  			expectedClientCall: "car brand best brand a very great car",
   144  		},
   145  		{
   146  			name:             "with no schema labels vectorized",
   147  			excludedProperty: "review",
   148  			excludedClass:    "Car",
   149  			input: &models.Object{
   150  				Class: "Car",
   151  				Properties: map[string]interface{}{
   152  					"review": "a very great car",
   153  				},
   154  			},
   155  			expectedClientCall: "a very great car",
   156  		},
   157  		{
   158  			name:             "with string/text arrays without propname or classname",
   159  			excludedProperty: "reviews",
   160  			excludedClass:    "Car",
   161  			input: &models.Object{
   162  				Class: "Car",
   163  				Properties: map[string]interface{}{
   164  					"reviews": []string{
   165  						"a very great car",
   166  						"you should consider buying one",
   167  					},
   168  				},
   169  			},
   170  			expectedClientCall: "a very great car you should consider buying one",
   171  		},
   172  		{
   173  			name: "with string/text arrays with propname and classname",
   174  			input: &models.Object{
   175  				Class: "Car",
   176  				Properties: map[string]interface{}{
   177  					"reviews": []string{
   178  						"a very great car",
   179  						"you should consider buying one",
   180  					},
   181  				},
   182  			},
   183  			expectedClientCall: "car reviews a very great car reviews you should consider buying one",
   184  		},
   185  		{
   186  			name: "with compound class and prop names",
   187  			input: &models.Object{
   188  				Class: "SuperCar",
   189  				Properties: map[string]interface{}{
   190  					"brandOfTheCar": "best brand",
   191  					"power":         300,
   192  					"review":        "a very great car",
   193  				},
   194  			},
   195  			expectedClientCall: "super car brand of the car best brand review a very great car",
   196  		},
   197  		{
   198  			name: "empty object with HF Inference Endpoint",
   199  			input: &models.Object{
   200  				Class: "Car",
   201  			},
   202  			endpointURL:        "https://url.cloud",
   203  			expectedClientCall: "car",
   204  		},
   205  	}
   206  
   207  	for _, test := range tests {
   208  		t.Run(test.name, func(t *testing.T) {
   209  			client := &fakeClient{}
   210  
   211  			v := New(client)
   212  
   213  			ic := &fakeClassConfig{
   214  				excludedProperty:      test.excludedProperty,
   215  				skippedProperty:       test.noindex,
   216  				vectorizeClassName:    test.excludedClass != "Car",
   217  				passageModel:          test.passageModel,
   218  				endpointURL:           test.endpointURL,
   219  				vectorizePropertyName: true,
   220  			}
   221  			comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties)
   222  			vector, _, err := v.Object(context.Background(), test.input, comp, ic)
   223  
   224  			require.Nil(t, err)
   225  			assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   226  			expected := strings.Split(test.expectedClientCall, " ")
   227  			actual := strings.Split(client.lastInput, " ")
   228  			assert.Equal(t, expected, actual)
   229  			if test.expectedHuggingFaceModel != "" {
   230  				assert.Equal(t, test.expectedHuggingFaceModel, client.lastConfig.Model)
   231  			}
   232  		})
   233  	}
   234  }
   235  
   236  func TestVectorizingObjectsWithDiff(t *testing.T) {
   237  	type testCase struct {
   238  		name              string
   239  		input             *models.Object
   240  		skipped           string
   241  		comp              moduletools.VectorizablePropsComparator
   242  		expectedVectorize bool
   243  	}
   244  
   245  	propsSchema := []*models.Property{
   246  		{
   247  			Name:     "brand",
   248  			DataType: schema.DataTypeText.PropString(),
   249  		},
   250  		{
   251  			Name:     "power",
   252  			DataType: schema.DataTypeInt.PropString(),
   253  		},
   254  		{
   255  			Name:     "description",
   256  			DataType: schema.DataTypeText.PropString(),
   257  		},
   258  		{
   259  			Name:     "reviews",
   260  			DataType: schema.DataTypeTextArray.PropString(),
   261  		},
   262  	}
   263  	props := map[string]interface{}{
   264  		"brand":       "best brand",
   265  		"power":       300,
   266  		"description": "a very great car",
   267  		"reviews": []string{
   268  			"a very great car",
   269  			"you should consider buying one",
   270  		},
   271  	}
   272  	vector := []float32{0, 0, 0, 0}
   273  	var vectors models.Vectors
   274  
   275  	tests := []testCase{
   276  		{
   277  			name: "noop comp",
   278  			input: &models.Object{
   279  				Class:      "Car",
   280  				Properties: props,
   281  			},
   282  			comp:              moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props),
   283  			expectedVectorize: true,
   284  		},
   285  		{
   286  			name: "all props unchanged",
   287  			input: &models.Object{
   288  				Class:      "Car",
   289  				Properties: props,
   290  			},
   291  			comp:              moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors),
   292  			expectedVectorize: false,
   293  		},
   294  		{
   295  			name: "one vectorizable prop changed (1)",
   296  			input: &models.Object{
   297  				Class:      "Car",
   298  				Properties: props,
   299  			},
   300  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   301  				"brand":       "old best brand",
   302  				"power":       300,
   303  				"description": "a very great car",
   304  				"reviews": []string{
   305  					"a very great car",
   306  					"you should consider buying one",
   307  				},
   308  			}, vector, vectors),
   309  			expectedVectorize: true,
   310  		},
   311  		{
   312  			name: "one vectorizable prop changed (2)",
   313  			input: &models.Object{
   314  				Class:      "Car",
   315  				Properties: props,
   316  			},
   317  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   318  				"brand":       "best brand",
   319  				"power":       300,
   320  				"description": "old a very great car",
   321  				"reviews": []string{
   322  					"a very great car",
   323  					"you should consider buying one",
   324  				},
   325  			}, vector, vectors),
   326  			expectedVectorize: true,
   327  		},
   328  		{
   329  			name: "one vectorizable prop changed (3)",
   330  			input: &models.Object{
   331  				Class:      "Car",
   332  				Properties: props,
   333  			},
   334  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   335  				"brand":       "best brand",
   336  				"power":       300,
   337  				"description": "a very great car",
   338  				"reviews": []string{
   339  					"old a very great car",
   340  					"you should consider buying one",
   341  				},
   342  			}, vector, vectors),
   343  			expectedVectorize: true,
   344  		},
   345  		{
   346  			name:    "all non-vectorizable props changed",
   347  			skipped: "description",
   348  			input: &models.Object{
   349  				Class:      "Car",
   350  				Properties: props,
   351  			},
   352  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   353  				"brand":       "best brand",
   354  				"power":       123,
   355  				"description": "old a very great car",
   356  				"reviews": []string{
   357  					"a very great car",
   358  					"you should consider buying one",
   359  				},
   360  			}, vector, vectors),
   361  			expectedVectorize: false,
   362  		},
   363  	}
   364  
   365  	for _, test := range tests {
   366  		t.Run(test.name, func(t *testing.T) {
   367  			ic := &fakeClassConfig{
   368  				skippedProperty: test.skipped,
   369  			}
   370  
   371  			client := &fakeClient{}
   372  			v := New(client)
   373  
   374  			vector, _, err := v.Object(context.Background(), test.input, test.comp, ic)
   375  
   376  			require.Nil(t, err)
   377  			if test.expectedVectorize {
   378  				assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   379  				assert.NotEmpty(t, client.lastInput)
   380  			} else {
   381  				assert.Equal(t, []float32{0, 0, 0, 0}, vector)
   382  				assert.Empty(t, client.lastInput)
   383  			}
   384  		})
   385  	}
   386  }