github.com/weaviate/weaviate@v1.24.6/modules/text2vec-jinaai/vectorizer/texts_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package vectorizer 13 14 import ( 15 "context" 16 "testing" 17 18 "github.com/stretchr/testify/assert" 19 "github.com/stretchr/testify/require" 20 ) 21 22 // as used in the nearText searcher 23 func TestVectorizingTexts(t *testing.T) { 24 type testCase struct { 25 name string 26 input []string 27 expectedJinaAIModel string 28 jinaAIModel string 29 } 30 31 tests := []testCase{ 32 { 33 name: "single word", 34 input: []string{"hello"}, 35 jinaAIModel: "jina-embedding-v2", 36 expectedJinaAIModel: "jina-embedding-v2", 37 }, 38 { 39 name: "multiple words", 40 input: []string{"hello world, this is me!"}, 41 jinaAIModel: "jina-embedding-v2", 42 expectedJinaAIModel: "jina-embedding-v2", 43 }, 44 { 45 name: "multiple sentences (joined with a dot)", 46 input: []string{"this is sentence 1", "and here's number 2"}, 47 jinaAIModel: "jina-embedding-v2", 48 expectedJinaAIModel: "jina-embedding-v2", 49 }, 50 { 51 name: "multiple sentences already containing a dot", 52 input: []string{"this is sentence 1.", "and here's number 2"}, 53 jinaAIModel: "jina-embedding-v2", 54 expectedJinaAIModel: "jina-embedding-v2", 55 }, 56 { 57 name: "multiple sentences already containing a question mark", 58 input: []string{"this is sentence 1?", "and here's number 2"}, 59 jinaAIModel: "jina-embedding-v2", 60 expectedJinaAIModel: "jina-embedding-v2", 61 }, 62 { 63 name: "multiple sentences already containing an exclamation mark", 64 input: []string{"this is sentence 1!", "and here's number 2"}, 65 jinaAIModel: "jina-embedding-v2", 66 expectedJinaAIModel: "jina-embedding-v2", 67 }, 68 { 69 name: "multiple sentences already containing comma", 70 input: []string{"this is sentence 1,", "and here's number 2"}, 71 jinaAIModel: "jina-embedding-v2", 72 expectedJinaAIModel: "jina-embedding-v2", 73 }, 74 } 75 76 for _, test := range tests { 77 t.Run(test.name, func(t *testing.T) { 78 client := &fakeClient{} 79 80 v := New(client) 81 82 settings := &fakeClassConfig{ 83 jinaAIModel: test.jinaAIModel, 84 } 85 vec, err := v.Texts(context.Background(), test.input, settings) 86 87 require.Nil(t, err) 88 assert.Equal(t, []float32{0.1, 1.1, 2.1, 3.1}, vec) 89 assert.Equal(t, test.input, client.lastInput) 90 assert.Equal(t, client.lastConfig.Model, test.expectedJinaAIModel) 91 }) 92 } 93 }