github.com/weaviate/weaviate@v1.24.6/modules/text2vec-transformers/vectorizer/texts_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"context"
    16  	"testing"
    17  
    18  	"github.com/stretchr/testify/assert"
    19  	"github.com/stretchr/testify/require"
    20  )
    21  
    22  // as used in the nearText searcher
    23  func TestVectorizingTexts(t *testing.T) {
    24  	type testCase struct {
    25  		name                    string
    26  		input                   []string
    27  		expectedPoolingStrategy string
    28  		poolingStrategy         string
    29  	}
    30  
    31  	tests := []testCase{
    32  		{
    33  			name:                    "single word",
    34  			input:                   []string{"hello"},
    35  			poolingStrategy:         "cls",
    36  			expectedPoolingStrategy: "cls",
    37  		},
    38  		{
    39  			name:                    "multiple words",
    40  			input:                   []string{"hello world, this is me!"},
    41  			poolingStrategy:         "cls",
    42  			expectedPoolingStrategy: "cls",
    43  		},
    44  
    45  		{
    46  			name:                    "multiple sentences (joined with a dot)",
    47  			input:                   []string{"this is sentence 1", "and here's number 2"},
    48  			poolingStrategy:         "cls",
    49  			expectedPoolingStrategy: "cls",
    50  		},
    51  
    52  		{
    53  			name:                    "multiple sentences already containing a dot",
    54  			input:                   []string{"this is sentence 1.", "and here's number 2"},
    55  			poolingStrategy:         "cls",
    56  			expectedPoolingStrategy: "cls",
    57  		},
    58  		{
    59  			name:                    "multiple sentences already containing a question mark",
    60  			input:                   []string{"this is sentence 1?", "and here's number 2"},
    61  			poolingStrategy:         "cls",
    62  			expectedPoolingStrategy: "cls",
    63  		},
    64  		{
    65  			name:                    "multiple sentences already containing an exclamation mark",
    66  			input:                   []string{"this is sentence 1!", "and here's number 2"},
    67  			poolingStrategy:         "cls",
    68  			expectedPoolingStrategy: "cls",
    69  		},
    70  		{
    71  			name:                    "multiple sentences already containing comma",
    72  			input:                   []string{"this is sentence 1,", "and here's number 2"},
    73  			poolingStrategy:         "cls",
    74  			expectedPoolingStrategy: "cls",
    75  		},
    76  	}
    77  
    78  	for _, test := range tests {
    79  		t.Run(test.name, func(t *testing.T) {
    80  			client := &fakeClient{}
    81  
    82  			v := New(client)
    83  
    84  			settings := &fakeClassConfig{
    85  				poolingStrategy: test.poolingStrategy,
    86  			}
    87  			vec, err := v.Texts(context.Background(), test.input, settings)
    88  
    89  			require.Nil(t, err)
    90  			assert.Equal(t, []float32{0, 1, 2, 3}, vec)
    91  			assert.Equal(t, client.lastConfig.PoolingStrategy, test.expectedPoolingStrategy)
    92  		})
    93  	}
    94  }