github.com/weaviate/weaviate@v1.24.6/modules/text2vec-openai/vectorizer/texts_test.go

github.com/weaviate/weaviate@v1.24.6/modules/text2vec-openai/vectorizer/texts_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"context"
    16  	"testing"
    17  
    18  	"github.com/stretchr/testify/assert"
    19  	"github.com/stretchr/testify/require"
    20  )
    21  
    22  // as used in the nearText searcher
    23  func TestVectorizingTexts(t *testing.T) {
    24  	type testCase struct {
    25  		name                 string
    26  		input                []string
    27  		expectedOpenAIType   string
    28  		openAIType           string
    29  		expectedOpenAIModel  string
    30  		openAIModel          string
    31  		modelVersion         string
    32  		expectedModelVersion string
    33  	}
    34  
    35  	tests := []testCase{
    36  		{
    37  			name:                "single word",
    38  			input:               []string{"hello"},
    39  			openAIType:          "text",
    40  			expectedOpenAIType:  "text",
    41  			openAIModel:         "ada",
    42  			expectedOpenAIModel: "ada",
    43  
    44  			// use something that doesn't exist on purpose to rule out that this was
    45  			// set by a default, but validate that the version was set explicitly
    46  			// due to https://github.com/weaviate/weaviate/issues/2458
    47  			modelVersion:         "003",
    48  			expectedModelVersion: "003",
    49  		},
    50  		{
    51  			name:                "multiple words",
    52  			input:               []string{"hello world, this is me!"},
    53  			openAIType:          "text",
    54  			expectedOpenAIType:  "text",
    55  			openAIModel:         "ada",
    56  			expectedOpenAIModel: "ada",
    57  
    58  			// use something that doesn't exist on purpose to rule out that this was
    59  			// set by a default, but validate that the version was set explicitly
    60  			// due to https://github.com/weaviate/weaviate/issues/2458
    61  			modelVersion:         "003",
    62  			expectedModelVersion: "003",
    63  		},
    64  		{
    65  			name:                "multiple sentences (joined with a dot)",
    66  			input:               []string{"this is sentence 1", "and here's number 2"},
    67  			openAIType:          "text",
    68  			expectedOpenAIType:  "text",
    69  			openAIModel:         "ada",
    70  			expectedOpenAIModel: "ada",
    71  
    72  			// use something that doesn't exist on purpose to rule out that this was
    73  			// set by a default, but validate that the version was set explicitly
    74  			// due to https://github.com/weaviate/weaviate/issues/2458
    75  			modelVersion:         "003",
    76  			expectedModelVersion: "003",
    77  		},
    78  		{
    79  			name:                "multiple sentences already containing a dot",
    80  			input:               []string{"this is sentence 1.", "and here's number 2"},
    81  			openAIType:          "text",
    82  			expectedOpenAIType:  "text",
    83  			openAIModel:         "ada",
    84  			expectedOpenAIModel: "ada",
    85  
    86  			// use something that doesn't exist on purpose to rule out that this was
    87  			// set by a default, but validate that the version was set explicitly
    88  			// due to https://github.com/weaviate/weaviate/issues/2458
    89  			modelVersion:         "003",
    90  			expectedModelVersion: "003",
    91  		},
    92  		{
    93  			name:                "multiple sentences already containing a question mark",
    94  			input:               []string{"this is sentence 1?", "and here's number 2"},
    95  			openAIType:          "text",
    96  			expectedOpenAIType:  "text",
    97  			openAIModel:         "ada",
    98  			expectedOpenAIModel: "ada",
    99  
   100  			// use something that doesn't exist on purpose to rule out that this was
   101  			// set by a default, but validate that the version was set explicitly
   102  			// due to https://github.com/weaviate/weaviate/issues/2458
   103  			modelVersion:         "003",
   104  			expectedModelVersion: "003",
   105  		},
   106  		{
   107  			name:                "multiple sentences already containing an exclamation mark",
   108  			input:               []string{"this is sentence 1!", "and here's number 2"},
   109  			openAIType:          "text",
   110  			expectedOpenAIType:  "text",
   111  			openAIModel:         "ada",
   112  			expectedOpenAIModel: "ada",
   113  
   114  			// use something that doesn't exist on purpose to rule out that this was
   115  			// set by a default, but validate that the version was set explicitly
   116  			// due to https://github.com/weaviate/weaviate/issues/2458
   117  			modelVersion:         "003",
   118  			expectedModelVersion: "003",
   119  		},
   120  		{
   121  			name:                "multiple sentences already containing comma",
   122  			input:               []string{"this is sentence 1,", "and here's number 2"},
   123  			openAIType:          "text",
   124  			expectedOpenAIType:  "text",
   125  			openAIModel:         "ada",
   126  			expectedOpenAIModel: "ada",
   127  
   128  			// use something that doesn't exist on purpose to rule out that this was
   129  			// set by a default, but validate that the version was set explicitly
   130  			// due to https://github.com/weaviate/weaviate/issues/2458
   131  			modelVersion:         "003",
   132  			expectedModelVersion: "003",
   133  		},
   134  	}
   135  
   136  	for _, test := range tests {
   137  		t.Run(test.name, func(t *testing.T) {
   138  			client := &fakeClient{}
   139  
   140  			v := New(client)
   141  
   142  			cfg := &fakeClassConfig{
   143  				classConfig: map[string]interface{}{
   144  					"type":         test.openAIType,
   145  					"model":        test.openAIModel,
   146  					"modelVersion": test.modelVersion,
   147  				},
   148  			}
   149  			vec, err := v.Texts(context.Background(), test.input, cfg)
   150  
   151  			require.Nil(t, err)
   152  			assert.Equal(t, []float32{0.1, 1.1, 2.1, 3.1}, vec)
   153  			assert.Equal(t, test.input, client.lastInput)
   154  			assert.Equal(t, client.lastConfig.Type, test.expectedOpenAIType)
   155  			assert.Equal(t, client.lastConfig.Model, test.expectedOpenAIModel)
   156  			assert.Equal(t, client.lastConfig.ModelVersion, test.expectedModelVersion)
   157  		})
   158  	}
   159  }