github.com/weaviate/weaviate@v1.24.6/modules/text2vec-aws/vectorizer/objects_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"context"
    16  	"strings"
    17  	"testing"
    18  
    19  	"github.com/stretchr/testify/assert"
    20  	"github.com/stretchr/testify/require"
    21  	"github.com/weaviate/weaviate/entities/models"
    22  	"github.com/weaviate/weaviate/entities/moduletools"
    23  	"github.com/weaviate/weaviate/entities/schema"
    24  )
    25  
    26  // These are mostly copy/pasted (with minimal additions) from the
    27  // text2vec-contextionary module
    28  func TestVectorizingObjects(t *testing.T) {
    29  	type testCase struct {
    30  		name               string
    31  		input              *models.Object
    32  		expectedClientCall string
    33  		noindex            string
    34  		excludedProperty   string // to simulate a schema where property names aren't vectorized
    35  		excludedClass      string // to simulate a schema where class names aren't vectorized
    36  		awsModel           string
    37  	}
    38  
    39  	propsSchema := []*models.Property{
    40  		{
    41  			Name:     "brand",
    42  			DataType: schema.DataTypeText.PropString(),
    43  		},
    44  		{
    45  			Name:     "power",
    46  			DataType: schema.DataTypeInt.PropString(),
    47  		},
    48  		{
    49  			Name:     "review",
    50  			DataType: schema.DataTypeText.PropString(),
    51  		},
    52  		{
    53  			Name:     "brandOfTheCar",
    54  			DataType: schema.DataTypeText.PropString(),
    55  		},
    56  		{
    57  			Name:     "reviews",
    58  			DataType: schema.DataTypeTextArray.PropString(),
    59  		},
    60  	}
    61  
    62  	tests := []testCase{
    63  		{
    64  			name: "empty object",
    65  			input: &models.Object{
    66  				Class: "Car",
    67  			},
    68  			awsModel:           "large",
    69  			expectedClientCall: "car",
    70  		},
    71  		{
    72  			name: "object with one string prop",
    73  			input: &models.Object{
    74  				Class: "Car",
    75  				Properties: map[string]interface{}{
    76  					"brand": "Mercedes",
    77  				},
    78  			},
    79  			expectedClientCall: "car brand mercedes",
    80  		},
    81  		{
    82  			name: "object with one non-string prop",
    83  			input: &models.Object{
    84  				Class: "Car",
    85  				Properties: map[string]interface{}{
    86  					"power": 300,
    87  				},
    88  			},
    89  			expectedClientCall: "car",
    90  		},
    91  		{
    92  			name: "object with a mix of props",
    93  			input: &models.Object{
    94  				Class: "Car",
    95  				Properties: map[string]interface{}{
    96  					"brand":  "best brand",
    97  					"power":  300,
    98  					"review": "a very great car",
    99  				},
   100  			},
   101  			expectedClientCall: "car brand best brand review a very great car",
   102  		},
   103  		{
   104  			name:    "with a noindexed property",
   105  			noindex: "review",
   106  			input: &models.Object{
   107  				Class: "Car",
   108  				Properties: map[string]interface{}{
   109  					"brand":  "best brand",
   110  					"power":  300,
   111  					"review": "a very great car",
   112  				},
   113  			},
   114  			expectedClientCall: "car brand best brand",
   115  		},
   116  		{
   117  			name:          "with the class name not vectorized",
   118  			excludedClass: "Car",
   119  			input: &models.Object{
   120  				Class: "Car",
   121  				Properties: map[string]interface{}{
   122  					"brand":  "best brand",
   123  					"power":  300,
   124  					"review": "a very great car",
   125  				},
   126  			},
   127  			expectedClientCall: "brand best brand review a very great car",
   128  		},
   129  		{
   130  			name:             "with a property name not vectorized",
   131  			excludedProperty: "review",
   132  			input: &models.Object{
   133  				Class: "Car",
   134  				Properties: map[string]interface{}{
   135  					"brand":  "best brand",
   136  					"power":  300,
   137  					"review": "a very great car",
   138  				},
   139  			},
   140  			expectedClientCall: "car brand best brand a very great car",
   141  		},
   142  		{
   143  			name:             "with no schema labels vectorized",
   144  			excludedProperty: "review",
   145  			excludedClass:    "Car",
   146  			input: &models.Object{
   147  				Class: "Car",
   148  				Properties: map[string]interface{}{
   149  					"review": "a very great car",
   150  				},
   151  			},
   152  			expectedClientCall: "a very great car",
   153  		},
   154  		{
   155  			name:             "with string/text arrays without propname or classname",
   156  			excludedProperty: "reviews",
   157  			excludedClass:    "Car",
   158  			input: &models.Object{
   159  				Class: "Car",
   160  				Properties: map[string]interface{}{
   161  					"reviews": []string{
   162  						"a very great car",
   163  						"you should consider buying one",
   164  					},
   165  				},
   166  			},
   167  			expectedClientCall: "a very great car you should consider buying one",
   168  		},
   169  		{
   170  			name: "with string/text arrays with propname and classname",
   171  			input: &models.Object{
   172  				Class: "Car",
   173  				Properties: map[string]interface{}{
   174  					"reviews": []string{
   175  						"a very great car",
   176  						"you should consider buying one",
   177  					},
   178  				},
   179  			},
   180  			expectedClientCall: "car reviews a very great car reviews you should consider buying one",
   181  		},
   182  		{
   183  			name: "with compound class and prop names",
   184  			input: &models.Object{
   185  				Class: "SuperCar",
   186  				Properties: map[string]interface{}{
   187  					"brandOfTheCar": "best brand",
   188  					"power":         300,
   189  					"review":        "a very great car",
   190  				},
   191  			},
   192  			expectedClientCall: "super car brand of the car best brand review a very great car",
   193  		},
   194  	}
   195  
   196  	for _, test := range tests {
   197  		t.Run(test.name, func(t *testing.T) {
   198  			client := &fakeClient{}
   199  
   200  			v := New(client)
   201  
   202  			ic := &fakeClassConfig{
   203  				skippedProperty:       test.noindex,
   204  				vectorizeClassName:    test.excludedClass != "Car",
   205  				excludedProperty:      test.excludedProperty,
   206  				service:               "",
   207  				region:                "",
   208  				model:                 "",
   209  				endpoint:              "",
   210  				targetModel:           "",
   211  				targetVariant:         "",
   212  				vectorizePropertyName: true,
   213  			}
   214  			comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties)
   215  			vector, _, err := v.Object(context.Background(), test.input, comp, ic)
   216  
   217  			require.Nil(t, err)
   218  			assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   219  			expected := strings.Split(test.expectedClientCall, " ")
   220  			actual := strings.Split(client.lastInput[0], " ")
   221  			assert.Equal(t, expected, actual)
   222  		})
   223  	}
   224  }
   225  
   226  func TestVectorizingObjectsWithDiff(t *testing.T) {
   227  	type testCase struct {
   228  		name              string
   229  		input             *models.Object
   230  		skipped           string
   231  		comp              moduletools.VectorizablePropsComparator
   232  		expectedVectorize bool
   233  	}
   234  
   235  	propsSchema := []*models.Property{
   236  		{
   237  			Name:     "brand",
   238  			DataType: schema.DataTypeText.PropString(),
   239  		},
   240  		{
   241  			Name:     "power",
   242  			DataType: schema.DataTypeInt.PropString(),
   243  		},
   244  		{
   245  			Name:     "description",
   246  			DataType: schema.DataTypeText.PropString(),
   247  		},
   248  		{
   249  			Name:     "reviews",
   250  			DataType: schema.DataTypeTextArray.PropString(),
   251  		},
   252  	}
   253  	props := map[string]interface{}{
   254  		"brand":       "best brand",
   255  		"power":       300,
   256  		"description": "a very great car",
   257  		"reviews": []string{
   258  			"a very great car",
   259  			"you should consider buying one",
   260  		},
   261  	}
   262  	vector := []float32{0, 0, 0, 0}
   263  	var vectors models.Vectors
   264  
   265  	tests := []testCase{
   266  		{
   267  			name: "noop comp",
   268  			input: &models.Object{
   269  				Class:      "Car",
   270  				Properties: props,
   271  			},
   272  			comp:              moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props),
   273  			expectedVectorize: true,
   274  		},
   275  		{
   276  			name: "all props unchanged",
   277  			input: &models.Object{
   278  				Class:      "Car",
   279  				Properties: props,
   280  			},
   281  			comp:              moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors),
   282  			expectedVectorize: false,
   283  		},
   284  		{
   285  			name: "one vectorizable prop changed (1)",
   286  			input: &models.Object{
   287  				Class:      "Car",
   288  				Properties: props,
   289  			},
   290  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   291  				"brand":       "old best brand",
   292  				"power":       300,
   293  				"description": "a very great car",
   294  				"reviews": []string{
   295  					"a very great car",
   296  					"you should consider buying one",
   297  				},
   298  			}, vector, vectors),
   299  			expectedVectorize: true,
   300  		},
   301  		{
   302  			name: "one vectorizable prop changed (2)",
   303  			input: &models.Object{
   304  				Class:      "Car",
   305  				Properties: props,
   306  			},
   307  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   308  				"brand":       "best brand",
   309  				"power":       300,
   310  				"description": "old a very great car",
   311  				"reviews": []string{
   312  					"a very great car",
   313  					"you should consider buying one",
   314  				},
   315  			}, vector, vectors),
   316  			expectedVectorize: true,
   317  		},
   318  		{
   319  			name: "one vectorizable prop changed (3)",
   320  			input: &models.Object{
   321  				Class:      "Car",
   322  				Properties: props,
   323  			},
   324  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   325  				"brand":       "best brand",
   326  				"power":       300,
   327  				"description": "a very great car",
   328  				"reviews": []string{
   329  					"old a very great car",
   330  					"you should consider buying one",
   331  				},
   332  			}, vector, vectors),
   333  			expectedVectorize: true,
   334  		},
   335  		{
   336  			name:    "all non-vectorizable props changed",
   337  			skipped: "description",
   338  			input: &models.Object{
   339  				Class:      "Car",
   340  				Properties: props,
   341  			},
   342  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   343  				"brand":       "best brand",
   344  				"power":       123,
   345  				"description": "old a very great car",
   346  				"reviews": []string{
   347  					"a very great car",
   348  					"you should consider buying one",
   349  				},
   350  			}, vector, vectors),
   351  			expectedVectorize: false,
   352  		},
   353  	}
   354  
   355  	for _, test := range tests {
   356  		t.Run(test.name, func(t *testing.T) {
   357  			ic := &fakeClassConfig{
   358  				skippedProperty: test.skipped,
   359  			}
   360  
   361  			client := &fakeClient{}
   362  			v := New(client)
   363  
   364  			vector, _, err := v.Object(context.Background(), test.input, test.comp, ic)
   365  
   366  			require.Nil(t, err)
   367  			if test.expectedVectorize {
   368  				assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   369  				assert.NotEmpty(t, client.lastInput)
   370  			} else {
   371  				assert.Equal(t, []float32{0, 0, 0, 0}, vector)
   372  				assert.Empty(t, client.lastInput)
   373  			}
   374  		})
   375  	}
   376  }