github.com/weaviate/weaviate@v1.24.6/modules/text2vec-openai/vectorizer/objects_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"testing"
    18  
    19  	"github.com/stretchr/testify/assert"
    20  	"github.com/stretchr/testify/require"
    21  	"github.com/weaviate/weaviate/entities/models"
    22  	"github.com/weaviate/weaviate/entities/moduletools"
    23  	"github.com/weaviate/weaviate/entities/schema"
    24  )
    25  
    26  // These are mostly copy/pasted (with minimal additions) from the
    27  // text2vec-contextionary module
    28  func TestVectorizingObjects(t *testing.T) {
    29  	type testCase struct {
    30  		name                string
    31  		input               *models.Object
    32  		expectedClientCall  string
    33  		expectedOpenAIType  string
    34  		expectedOpenAIModel string
    35  		noindex             string
    36  		excludedProperty    string // to simulate a schema where property names aren't vectorized
    37  		excludedClass       string // to simulate a schema where class names aren't vectorized
    38  		openAIType          string
    39  		openAIModel         string
    40  		openAIModelVersion  string
    41  	}
    42  
    43  	propsSchema := []*models.Property{
    44  		{
    45  			Name:     "brand",
    46  			DataType: schema.DataTypeText.PropString(),
    47  		},
    48  		{
    49  			Name:     "power",
    50  			DataType: schema.DataTypeInt.PropString(),
    51  		},
    52  		{
    53  			Name:     "review",
    54  			DataType: schema.DataTypeText.PropString(),
    55  		},
    56  		{
    57  			Name:     "brandOfTheCar",
    58  			DataType: schema.DataTypeText.PropString(),
    59  		},
    60  		{
    61  			Name:     "reviews",
    62  			DataType: schema.DataTypeTextArray.PropString(),
    63  		},
    64  	}
    65  
    66  	tests := []testCase{
    67  		{
    68  			name: "empty object",
    69  			input: &models.Object{
    70  				Class: "Car",
    71  			},
    72  			openAIType:          "text",
    73  			openAIModel:         "ada",
    74  			expectedOpenAIType:  "text",
    75  			expectedOpenAIModel: "ada",
    76  			expectedClientCall:  "car",
    77  		},
    78  		{
    79  			name: "object with one string prop",
    80  			input: &models.Object{
    81  				Class: "Car",
    82  				Properties: map[string]interface{}{
    83  					"brand": "Mercedes",
    84  				},
    85  			},
    86  			expectedClientCall: "car brand mercedes",
    87  		},
    88  		{
    89  			name: "object with one non-string prop",
    90  			input: &models.Object{
    91  				Class: "Car",
    92  				Properties: map[string]interface{}{
    93  					"power": 300,
    94  				},
    95  			},
    96  			expectedClientCall: "car",
    97  		},
    98  		{
    99  			name: "object with a mix of props",
   100  			input: &models.Object{
   101  				Class: "Car",
   102  				Properties: map[string]interface{}{
   103  					"brand":  "best brand",
   104  					"power":  300,
   105  					"review": "a very great car",
   106  				},
   107  			},
   108  			expectedClientCall: "car brand best brand review a very great car",
   109  		},
   110  		{
   111  			name:    "with a noindexed property",
   112  			noindex: "review",
   113  			input: &models.Object{
   114  				Class: "Car",
   115  				Properties: map[string]interface{}{
   116  					"brand":  "best brand",
   117  					"power":  300,
   118  					"review": "a very great car",
   119  				},
   120  			},
   121  			expectedClientCall: "car brand best brand",
   122  		},
   123  		{
   124  			name:          "with the class name not vectorized",
   125  			excludedClass: "Car",
   126  			input: &models.Object{
   127  				Class: "Car",
   128  				Properties: map[string]interface{}{
   129  					"brand":  "best brand",
   130  					"power":  300,
   131  					"review": "a very great car",
   132  				},
   133  			},
   134  			expectedClientCall: "brand best brand review a very great car",
   135  		},
   136  		{
   137  			name:             "with a property name not vectorized",
   138  			excludedProperty: "review",
   139  			input: &models.Object{
   140  				Class: "Car",
   141  				Properties: map[string]interface{}{
   142  					"brand":  "best brand",
   143  					"power":  300,
   144  					"review": "a very great car",
   145  				},
   146  			},
   147  			expectedClientCall: "car brand best brand a very great car",
   148  		},
   149  		{
   150  			name:             "with no schema labels vectorized",
   151  			excludedProperty: "review",
   152  			excludedClass:    "Car",
   153  			input: &models.Object{
   154  				Class: "Car",
   155  				Properties: map[string]interface{}{
   156  					"review": "a very great car",
   157  				},
   158  			},
   159  			expectedClientCall: "a very great car",
   160  		},
   161  		{
   162  			name:             "with string/text arrays without propname or classname",
   163  			excludedProperty: "reviews",
   164  			excludedClass:    "Car",
   165  			input: &models.Object{
   166  				Class: "Car",
   167  				Properties: map[string]interface{}{
   168  					"reviews": []string{
   169  						"a very great car",
   170  						"you should consider buying one",
   171  					},
   172  				},
   173  			},
   174  			expectedClientCall: "a very great car you should consider buying one",
   175  		},
   176  		{
   177  			name: "with string/text arrays with propname and classname",
   178  			input: &models.Object{
   179  				Class: "Car",
   180  				Properties: map[string]interface{}{
   181  					"reviews": []string{
   182  						"a very great car",
   183  						"you should consider buying one",
   184  					},
   185  				},
   186  			},
   187  			expectedClientCall: "car reviews a very great car reviews you should consider buying one",
   188  		},
   189  		{
   190  			name: "with compound class and prop names",
   191  			input: &models.Object{
   192  				Class: "SuperCar",
   193  				Properties: map[string]interface{}{
   194  					"brandOfTheCar": "best brand",
   195  					"power":         300,
   196  					"review":        "a very great car",
   197  				},
   198  			},
   199  			expectedClientCall: "super car brand of the car best brand review a very great car",
   200  		},
   201  	}
   202  
   203  	for _, test := range tests {
   204  		t.Run(test.name, func(t *testing.T) {
   205  			client := &fakeClient{}
   206  
   207  			v := New(client)
   208  
   209  			cfg := &fakeClassConfig{
   210  				classConfig: map[string]interface{}{
   211  					"vectorizeClassName": test.excludedClass != "Car",
   212  					"type":               test.openAIType,
   213  					"model":              test.openAIModel,
   214  					"modelVersion":       test.openAIModelVersion,
   215  				},
   216  				vectorizePropertyName: true,
   217  				skippedProperty:       test.noindex,
   218  				excludedProperty:      test.excludedProperty,
   219  			}
   220  			comp := moduletools.NewVectorizablePropsComparatorDummy(propsSchema, test.input.Properties)
   221  			vector, _, err := v.Object(context.Background(), test.input, comp, cfg)
   222  
   223  			require.Nil(t, err)
   224  			assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   225  			assert.Equal(t, []string{test.expectedClientCall}, client.lastInput)
   226  			assert.Equal(t, test.expectedOpenAIType, client.lastConfig.Type)
   227  			assert.Equal(t, test.expectedOpenAIModel, client.lastConfig.Model)
   228  		})
   229  	}
   230  }
   231  
   232  func TestClassSettings(t *testing.T) {
   233  	type testCase struct {
   234  		expectedBaseURL string
   235  		cfg             moduletools.ClassConfig
   236  	}
   237  	tests := []testCase{
   238  		{
   239  			cfg: fakeClassConfig{
   240  				classConfig: make(map[string]interface{}),
   241  			},
   242  			expectedBaseURL: DefaultBaseURL,
   243  		},
   244  		{
   245  			cfg: fakeClassConfig{
   246  				classConfig: map[string]interface{}{
   247  					"baseURL": "https://proxy.weaviate.dev",
   248  				},
   249  			},
   250  			expectedBaseURL: "https://proxy.weaviate.dev",
   251  		},
   252  	}
   253  
   254  	for _, tt := range tests {
   255  		ic := NewClassSettings(tt.cfg)
   256  		assert.Equal(t, tt.expectedBaseURL, ic.BaseURL())
   257  	}
   258  }
   259  
   260  func TestVectorizingObjectWithDiff(t *testing.T) {
   261  	type testCase struct {
   262  		name              string
   263  		input             *models.Object
   264  		skipped           string
   265  		comp              moduletools.VectorizablePropsComparator
   266  		expectedVectorize bool
   267  	}
   268  
   269  	propsSchema := []*models.Property{
   270  		{
   271  			Name:     "brand",
   272  			DataType: schema.DataTypeText.PropString(),
   273  		},
   274  		{
   275  			Name:     "power",
   276  			DataType: schema.DataTypeInt.PropString(),
   277  		},
   278  		{
   279  			Name:     "description",
   280  			DataType: schema.DataTypeText.PropString(),
   281  		},
   282  		{
   283  			Name:     "reviews",
   284  			DataType: schema.DataTypeTextArray.PropString(),
   285  		},
   286  	}
   287  	props := map[string]interface{}{
   288  		"brand":       "best brand",
   289  		"power":       300,
   290  		"description": "a very great car",
   291  		"reviews": []string{
   292  			"a very great car",
   293  			"you should consider buying one",
   294  		},
   295  	}
   296  	vector := []float32{0, 0, 0, 0}
   297  	var vectors models.Vectors
   298  
   299  	tests := []testCase{
   300  		{
   301  			name: "noop comp",
   302  			input: &models.Object{
   303  				Class:      "Car",
   304  				Properties: props,
   305  			},
   306  			comp:              moduletools.NewVectorizablePropsComparatorDummy(propsSchema, props),
   307  			expectedVectorize: true,
   308  		},
   309  		{
   310  			name: "all props unchanged",
   311  			input: &models.Object{
   312  				Class:      "Car",
   313  				Properties: props,
   314  			},
   315  			comp:              moduletools.NewVectorizablePropsComparator(propsSchema, props, props, vector, vectors),
   316  			expectedVectorize: false,
   317  		},
   318  		{
   319  			name: "one vectorizable prop changed (1)",
   320  			input: &models.Object{
   321  				Class:      "Car",
   322  				Properties: props,
   323  			},
   324  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   325  				"brand":       "old best brand",
   326  				"power":       300,
   327  				"description": "a very great car",
   328  				"reviews": []string{
   329  					"a very great car",
   330  					"you should consider buying one",
   331  				},
   332  			}, vector, vectors),
   333  			expectedVectorize: true,
   334  		},
   335  		{
   336  			name: "one vectorizable prop changed (2)",
   337  			input: &models.Object{
   338  				Class:      "Car",
   339  				Properties: props,
   340  			},
   341  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   342  				"brand":       "best brand",
   343  				"power":       300,
   344  				"description": "old a very great car",
   345  				"reviews": []string{
   346  					"a very great car",
   347  					"you should consider buying one",
   348  				},
   349  			}, vector, vectors),
   350  			expectedVectorize: true,
   351  		},
   352  		{
   353  			name: "one vectorizable prop changed (3)",
   354  			input: &models.Object{
   355  				Class:      "Car",
   356  				Properties: props,
   357  			},
   358  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   359  				"brand":       "best brand",
   360  				"power":       300,
   361  				"description": "a very great car",
   362  				"reviews": []string{
   363  					"old a very great car",
   364  					"you should consider buying one",
   365  				},
   366  			}, vector, vectors),
   367  			expectedVectorize: true,
   368  		},
   369  		{
   370  			name:    "all non-vectorizable props changed",
   371  			skipped: "description",
   372  			input: &models.Object{
   373  				Class:      "Car",
   374  				Properties: props,
   375  			},
   376  			comp: moduletools.NewVectorizablePropsComparator(propsSchema, props, map[string]interface{}{
   377  				"brand":       "best brand",
   378  				"power":       123,
   379  				"description": "old a very great car",
   380  				"reviews": []string{
   381  					"a very great car",
   382  					"you should consider buying one",
   383  				},
   384  			}, vector, vectors),
   385  			expectedVectorize: false,
   386  		},
   387  	}
   388  
   389  	for _, test := range tests {
   390  		t.Run(test.name, func(t *testing.T) {
   391  			cfg := &fakeClassConfig{
   392  				skippedProperty: test.skipped,
   393  			}
   394  
   395  			client := &fakeClient{}
   396  			v := New(client)
   397  
   398  			vector, _, err := v.Object(context.Background(), test.input, test.comp, cfg)
   399  
   400  			require.Nil(t, err)
   401  			if test.expectedVectorize {
   402  				assert.Equal(t, []float32{0, 1, 2, 3}, vector)
   403  				assert.NotEmpty(t, client.lastInput)
   404  			} else {
   405  				assert.Equal(t, []float32{0, 0, 0, 0}, vector)
   406  				assert.Empty(t, client.lastInput)
   407  			}
   408  		})
   409  	}
   410  }
   411  
   412  func TestValidateModelVersion(t *testing.T) {
   413  	type test struct {
   414  		model    string
   415  		docType  string
   416  		version  string
   417  		possible bool
   418  	}
   419  
   420  	tests := []test{
   421  		// 001 models
   422  		{"ada", "text", "001", true},
   423  		{"ada", "code", "001", true},
   424  		{"babbage", "text", "001", true},
   425  		{"babbage", "code", "001", true},
   426  		{"curie", "text", "001", true},
   427  		{"curie", "code", "001", true},
   428  		{"davinci", "text", "001", true},
   429  		{"davinci", "code", "001", true},
   430  
   431  		// 002 models
   432  		{"ada", "text", "002", true},
   433  		{"davinci", "text", "002", true},
   434  		{"ada", "code", "002", false},
   435  		{"babbage", "text", "002", false},
   436  		{"babbage", "code", "002", false},
   437  		{"curie", "text", "002", false},
   438  		{"curie", "code", "002", false},
   439  		{"davinci", "code", "002", false},
   440  
   441  		// 003
   442  		{"davinci", "text", "003", true},
   443  		{"ada", "text", "003", false},
   444  		{"babbage", "text", "003", false},
   445  
   446  		// 004
   447  		{"davinci", "text", "004", false},
   448  		{"ada", "text", "004", false},
   449  		{"babbage", "text", "004", false},
   450  	}
   451  
   452  	for _, test := range tests {
   453  		name := fmt.Sprintf("model=%s docType=%s version=%s", test.model, test.docType, test.version)
   454  		t.Run(name, func(t *testing.T) {
   455  			err := (&classSettings{}).validateModelVersion(test.version, test.model, test.docType)
   456  			if test.possible {
   457  				assert.Nil(t, err, "this combination should be possible")
   458  			} else {
   459  				assert.NotNil(t, err, "this combination should not be possible")
   460  			}
   461  		})
   462  	}
   463  }
   464  
   465  func TestPickDefaultModelVersion(t *testing.T) {
   466  	t.Run("ada with text", func(t *testing.T) {
   467  		version := PickDefaultModelVersion("ada", "text")
   468  		assert.Equal(t, "002", version)
   469  	})
   470  
   471  	t.Run("ada with code", func(t *testing.T) {
   472  		version := PickDefaultModelVersion("ada", "code")
   473  		assert.Equal(t, "001", version)
   474  	})
   475  
   476  	t.Run("with curie", func(t *testing.T) {
   477  		version := PickDefaultModelVersion("curie", "text")
   478  		assert.Equal(t, "001", version)
   479  	})
   480  }