github.com/weaviate/weaviate@v1.24.6/modules/text2vec-transformers/config_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package modtransformers
    13  
    14  import (
    15  	"context"
    16  	"testing"
    17  
    18  	"github.com/sirupsen/logrus"
    19  	ltest "github.com/sirupsen/logrus/hooks/test"
    20  	"github.com/stretchr/testify/assert"
    21  	"github.com/stretchr/testify/require"
    22  	"github.com/weaviate/weaviate/entities/models"
    23  	"github.com/weaviate/weaviate/entities/schema"
    24  )
    25  
    26  func TestConfigDefaults(t *testing.T) {
    27  	t.Run("for properties", func(t *testing.T) {
    28  		def := New().ClassConfigDefaults()
    29  
    30  		assert.Equal(t, true, def["vectorizeClassName"])
    31  		assert.Equal(t, "masked_mean", def["poolingStrategy"])
    32  	})
    33  
    34  	t.Run("for the class", func(t *testing.T) {
    35  		dt := schema.DataTypeText
    36  		def := New().PropertyConfigDefaults(&dt)
    37  		assert.Equal(t, false, def["vectorizePropertyName"])
    38  		assert.Equal(t, false, def["skip"])
    39  	})
    40  }
    41  
    42  func TestConfigValidator(t *testing.T) {
    43  	t.Run("all usable props no-indexed", func(t *testing.T) {
    44  		t.Run("all schema vectorization turned off", func(t *testing.T) {
    45  			class := &models.Class{
    46  				Vectorizer: "text2vec-contextionary",
    47  				Class:      "ValidName",
    48  				Properties: []*models.Property{
    49  					{
    50  						DataType: []string{"text"},
    51  						Name:     "description",
    52  					},
    53  					{
    54  						DataType:     schema.DataTypeText.PropString(),
    55  						Tokenization: models.PropertyTokenizationWhitespace,
    56  						Name:         "name",
    57  					},
    58  					{
    59  						DataType: []string{"int"},
    60  						Name:     "amount",
    61  					},
    62  				},
    63  			}
    64  
    65  			logger, _ := ltest.NewNullLogger()
    66  			v := NewConfigValidator(logger)
    67  			err := v.Do(context.Background(), class, nil, &fakeIndexChecker{
    68  				vectorizePropertyName: false,
    69  				vectorizeClassName:    false,
    70  				propertyIndexed:       false,
    71  			})
    72  			assert.NotNil(t, err)
    73  		})
    74  	})
    75  }
    76  
    77  func TestConfigValidator_RiskOfDuplicateVectors(t *testing.T) {
    78  	type test struct {
    79  		name          string
    80  		in            *models.Class
    81  		expectWarning bool
    82  		indexChecker  *fakeIndexChecker
    83  	}
    84  
    85  	tests := []test{
    86  		{
    87  			name: "usable properties",
    88  			in: &models.Class{
    89  				Class: "ValidName",
    90  				Properties: []*models.Property{
    91  					{
    92  						DataType: []string{string(schema.DataTypeText)},
    93  						Name:     "textProp",
    94  					},
    95  				},
    96  			},
    97  			expectWarning: false,
    98  			indexChecker: &fakeIndexChecker{
    99  				vectorizePropertyName: false,
   100  				vectorizeClassName:    true,
   101  				propertyIndexed:       true,
   102  			},
   103  		},
   104  		{
   105  			name: "no properties",
   106  			in: &models.Class{
   107  				Class: "ValidName",
   108  			},
   109  			expectWarning: true,
   110  			indexChecker: &fakeIndexChecker{
   111  				vectorizePropertyName: false,
   112  				vectorizeClassName:    true,
   113  				propertyIndexed:       false,
   114  			},
   115  		},
   116  		{
   117  			name: "usable properties, but they are no-indexed",
   118  			in: &models.Class{
   119  				Class: "ValidName",
   120  				Properties: []*models.Property{
   121  					{
   122  						DataType: []string{string(schema.DataTypeText)},
   123  						Name:     "textProp",
   124  					},
   125  				},
   126  			},
   127  			expectWarning: true,
   128  			indexChecker: &fakeIndexChecker{
   129  				vectorizePropertyName: false,
   130  				vectorizeClassName:    true,
   131  				propertyIndexed:       false,
   132  			},
   133  		},
   134  		{
   135  			name: "only unusable properties",
   136  			in: &models.Class{
   137  				Class: "ValidName",
   138  				Properties: []*models.Property{
   139  					{
   140  						DataType: []string{string(schema.DataTypeInt)},
   141  						Name:     "intProp",
   142  					},
   143  				},
   144  			},
   145  			expectWarning: true,
   146  			indexChecker: &fakeIndexChecker{
   147  				vectorizePropertyName: false,
   148  				vectorizeClassName:    true,
   149  				propertyIndexed:       false,
   150  			},
   151  		},
   152  	}
   153  
   154  	for _, test := range tests {
   155  		t.Run(test.name, func(t *testing.T) {
   156  			logger, hook := ltest.NewNullLogger()
   157  			v := NewConfigValidator(logger)
   158  			err := v.Do(context.Background(), test.in, nil, test.indexChecker)
   159  			require.Nil(t, err)
   160  
   161  			entry := hook.LastEntry()
   162  			if test.expectWarning {
   163  				require.NotNil(t, entry)
   164  				assert.Equal(t, logrus.WarnLevel, entry.Level)
   165  			} else {
   166  				assert.Nil(t, entry)
   167  			}
   168  		})
   169  	}
   170  }
   171  
   172  type fakeIndexChecker struct {
   173  	vectorizeClassName    bool
   174  	vectorizePropertyName bool
   175  	propertyIndexed       bool
   176  }
   177  
   178  func (f *fakeIndexChecker) VectorizeClassName() bool {
   179  	return f.vectorizeClassName
   180  }
   181  
   182  func (f *fakeIndexChecker) VectorizePropertyName(propName string) bool {
   183  	return f.vectorizePropertyName
   184  }
   185  
   186  func (f *fakeIndexChecker) PropertyIndexed(propName string) bool {
   187  	return f.propertyIndexed
   188  }