github.com/weaviate/weaviate@v1.24.6/modules/text2vec-transformers/vectorizer/objects_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"context"
    16  	"strings"
    17  	"testing"
    18  
    19  	"github.com/stretchr/testify/assert"
    20  	"github.com/stretchr/testify/require"
    21  	"github.com/weaviate/weaviate/entities/models"
    22  	"github.com/weaviate/weaviate/entities/moduletools"
    23  	"github.com/weaviate/weaviate/entities/schema"
    24  )
    25  
    26  // These are mostly copy/pasted (with minimal additions) from the
    27  // text2vec-contextionary module
    28  func TestVectorizingObjects(t *testing.T) {
    29  	type testCase struct {
    30  		name                    string
    31  		input                   *models.Object
    32  		expectedClientCall      string
    33  		expectedPoolingStrategy string
    34  		noindex                 string
    35  		excludedProperty        string // to simulate a schema where property names aren't vectorized
    36  		excludedClass           string // to simulate a schema where class names aren't vectorized
    37  		poolingStrategy         string
    38  	}
    39  
    40  	propsSchema := []*models.Property{
    41  		{
    42  			Name:     "brand",
    43  			DataType: schema.DataTypeText.PropString(),
    44  		},
    45  		{
    46  			Name:     "power",
    47  			DataType: schema.DataTypeInt.PropString(),
    48  		},
    49  		{
    50  			Name:     "review",
    51  			DataType: schema.DataTypeText.PropString(),
    52  		},
    53  		{
    54  			Name:     "brandOfTheCar",
    55  			DataType: schema.DataTypeText.PropString(),
    56  		},
    57  		{
    58  			Name:     "reviews",
    59  			DataType: schema.DataTypeTextArray.PropString(),
    60  		},
    61  	}
    62  
    63  	tests := []testCase{
    64  		{
    65  			name: "empty object",
    66  			input: &models.Object{
    67  				Class: "Car",
    68  			},
    69  			poolingStrategy:         "cls",
    70  			expectedPoolingStrategy: "cls",
    71  			expectedClientCall:      "car",
    72  		},
    73  		{
    74  			name: "object with one string prop",
    75  			input: &models.Object{
    76  				Class: "Car",
    77  				Properties: map[string]interface{}{
    78  					"brand": "Mercedes",
    79  				},
    80  			},
    81  			expectedClientCall: "car brand mercedes",
    82  		},
    83  
    84  		{
    85  			name: "object with one non-string prop",
    86  			input: &models.Object{
    87  				Class: "Car",
    88  				Properties: map[string]interface{}{
    89  					"power": 300,
    90  				},
    91  			},
    92  			expectedClientCall: "car",
    93  		},
    94  
    95  		{
    96  			name: "object with a mix of props",
    97  			input: &models.Object{
    98  				Class: "Car",
    99  				Properties: map[string]interface{}{
   100  					"brand":  "best brand",
   101  					"power":  300,
   102  					"review": "a very great car",
   103  				},
   104  			},
   105  			expectedClientCall: "car brand best brand review a very great car",
   106  		},
   107  		{
   108  			name:    "with a noindexed property",
   109  			noindex: "review",
   110  			input: &models.Object{
   111  				Class: "Car",
   112  				Properties: map[string]interface{}{
   113  					"brand":  "best brand",
   114  					"power":  300,
   115  					"review": "a very great car",
   116  				},
   117  			},
   118  			expectedClientCall: "car brand best brand",
   119  		},
   120  
   121  		{
   122  			name:          "with the class name not vectorized",
   123  			excludedClass: "Car",
   124  			input: &models.Object{
   125  				Class: "Car",
   126  				Properties: map[string]interface{}{
   127  					"brand":  "best brand",
   128  					"power":  300,
   129  					"review": "a very great car",
   130  				},
   131  			},
   132  			expectedClientCall: "brand best brand review a very great car",
   133  		},
   134  
   135  		{
   136  			name:             "with a property name not vectorized",
   137  			excludedProperty: "review",
   138  			input: &models.Object{
   139  				Class: "Car",
   140  				Properties: map[string]interface{}{
   141  					"brand":  "best brand",
   142  					"power":  300,
   143  					"review": "a very great car",
   144  				},
   145  			},
   146  			expectedClientCall: "car brand best brand a very great car",
   147  		},
   148  
   149  		{
   150  			name:             "with no schema labels vectorized",
   151  			excludedProperty: "review",
   152  			excludedClass:    "Car",
   153  			input: &models.Object{
   154  				Class: "Car",
   155  				Properties: map[string]interface{}{
   156  					"review": "a very great car",
   157  				},
   158  			},
   159  			expectedClientCall: "a very great car",
   160  		},
   161  
   162  		{
   163  			name:             "with string/text arrays without propname or classname",
   164  			excludedProperty: "reviews",
   165  			excludedClass:    "Car",
   166  			input: &models.Object{
   167  				Class: "Car",
   168  				Properties: map[string]interface{}{
   169  					"reviews": []string{
   170  						"a very great car",
   171  						"you should consider buying one",
   172  					},
   173  				},
   174  			},
   175  			expectedClientCall: "a very great car you should consider buying one",
   176  		},
   177  
   178  		{
   179  			name: "with string/text arrays with propname and classname",
   180  			input: &models.Object{
   181  				Class: "Car",
   182  				Properties: map[string]interface{}{
   183  					"reviews": []string{
   184  						"a very great car",
   185  						"you should consider buying one",
   186  					},
   187  				},
   188  			},
   189  			expectedClientCall: "car reviews a very great car reviews you should consider buying one",
   190  		},
   191  
   192  		{
   193  			name: "with compound class and prop names",
   194  			input: &models.Object{
   195  				Class: "SuperCar",
   196  				Properties: map[string]interface{}{
   197  					"brandOfTheCar": "best brand",
   198  					"power":         300,
   199  					"review":        "a very great car",
   200  				},
   201  			},
   202  			expectedClientCall: "super car brand of the car best brand review a very great car",
   203  		},
   204  	}
   205  
   206  	for _, test := range tests {
   207  		t.Run(test.name, func(t *testing.T) {
   208  			client := &fakeClient{}
   209  
   210  			v := New(client)
   211  
   212  			ic := &fakeClassConfig{
   213  				excludedProperty:      test.excludedProperty,
   214  				skippedProperty:       test.noindex,
   215  				vectorizeClassName:    test.excludedClass != "Car",
   216  				poolingStrategy:       test.poolingStrategy,
   217  				vectorizePropertyName: true,
   218  			}
   219  			comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties)
   220  			vector, _, err := v.Object(context.Background(), test.input, comp, ic)
   221  
   222  			require.Nil(t, err)
   223  			assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   224  			expected := strings.Split(test.expectedClientCall, " ")
   225  			actual := strings.Split(client.lastInput, " ")
   226  			assert.Equal(t, expected, actual)
   227  			assert.Equal(t, client.lastConfig.PoolingStrategy, test.expectedPoolingStrategy)
   228  		})
   229  	}
   230  }
   231  
   232  func TestVectorizingObjectsWithDiff(t *testing.T) {
   233  	type testCase struct {
   234  		name              string
   235  		input             *models.Object
   236  		skipped           string
   237  		comp              moduletools.VectorizablePropsComparator
   238  		expectedVectorize bool
   239  	}
   240  
   241  	propsSchema := []*models.Property{
   242  		{
   243  			Name:     "brand",
   244  			DataType: schema.DataTypeText.PropString(),
   245  		},
   246  		{
   247  			Name:     "power",
   248  			DataType: schema.DataTypeInt.PropString(),
   249  		},
   250  		{
   251  			Name:     "description",
   252  			DataType: schema.DataTypeText.PropString(),
   253  		},
   254  		{
   255  			Name:     "reviews",
   256  			DataType: schema.DataTypeTextArray.PropString(),
   257  		},
   258  	}
   259  	props := map[string]interface{}{
   260  		"brand":       "best brand",
   261  		"power":       300,
   262  		"description": "a very great car",
   263  		"reviews": []string{
   264  			"a very great car",
   265  			"you should consider buying one",
   266  		},
   267  	}
   268  	vector := []float32{0, 0, 0, 0}
   269  	var vectors models.Vectors
   270  
   271  	tests := []testCase{
   272  		{
   273  			name: "noop comp",
   274  			input: &models.Object{
   275  				Class:      "Car",
   276  				Properties: props,
   277  			},
   278  			comp:              moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props),
   279  			expectedVectorize: true,
   280  		},
   281  		{
   282  			name: "all props unchanged",
   283  			input: &models.Object{
   284  				Class:      "Car",
   285  				Properties: props,
   286  			},
   287  			comp:              moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors),
   288  			expectedVectorize: false,
   289  		},
   290  		{
   291  			name: "one vectorizable prop changed (1)",
   292  			input: &models.Object{
   293  				Class:      "Car",
   294  				Properties: props,
   295  			},
   296  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   297  				"brand":       "old best brand",
   298  				"power":       300,
   299  				"description": "a very great car",
   300  				"reviews": []string{
   301  					"a very great car",
   302  					"you should consider buying one",
   303  				},
   304  			}, vector, vectors),
   305  			expectedVectorize: true,
   306  		},
   307  		{
   308  			name: "one vectorizable prop changed (2)",
   309  			input: &models.Object{
   310  				Class:      "Car",
   311  				Properties: props,
   312  			},
   313  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   314  				"brand":       "best brand",
   315  				"power":       300,
   316  				"description": "old a very great car",
   317  				"reviews": []string{
   318  					"a very great car",
   319  					"you should consider buying one",
   320  				},
   321  			}, vector, vectors),
   322  			expectedVectorize: true,
   323  		},
   324  		{
   325  			name: "one vectorizable prop changed (3)",
   326  			input: &models.Object{
   327  				Class:      "Car",
   328  				Properties: props,
   329  			},
   330  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   331  				"brand":       "best brand",
   332  				"power":       300,
   333  				"description": "a very great car",
   334  				"reviews": []string{
   335  					"old a very great car",
   336  					"you should consider buying one",
   337  				},
   338  			}, vector, vectors),
   339  			expectedVectorize: true,
   340  		},
   341  		{
   342  			name:    "all non-vectorizable props changed",
   343  			skipped: "description",
   344  			input: &models.Object{
   345  				Class:      "Car",
   346  				Properties: props,
   347  			},
   348  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   349  				"brand":       "best brand",
   350  				"power":       123,
   351  				"description": "old a very great car",
   352  				"reviews": []string{
   353  					"a very great car",
   354  					"you should consider buying one",
   355  				},
   356  			}, vector, vectors),
   357  			expectedVectorize: false,
   358  		},
   359  	}
   360  
   361  	for _, test := range tests {
   362  		t.Run(test.name, func(t *testing.T) {
   363  			ic := &fakeClassConfig{
   364  				skippedProperty: test.skipped,
   365  			}
   366  
   367  			client := &fakeClient{}
   368  			v := New(client)
   369  
   370  			vector, _, err := v.Object(context.Background(), test.input, test.comp, ic)
   371  
   372  			require.Nil(t, err)
   373  			if test.expectedVectorize {
   374  				assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   375  				assert.NotEmpty(t, client.lastInput)
   376  			} else {
   377  				assert.Equal(t, []float32{0, 0, 0, 0}, vector)
   378  				assert.Empty(t, client.lastInput)
   379  			}
   380  		})
   381  	}
   382  }