github.com/weaviate/weaviate@v1.24.6/usecases/modules/vectorizer_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package modules
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"testing"
    18  
    19  	"github.com/go-openapi/strfmt"
    20  	"github.com/google/uuid"
    21  	"github.com/sirupsen/logrus/hooks/test"
    22  	"github.com/stretchr/testify/assert"
    23  	"github.com/weaviate/weaviate/entities/models"
    24  	"github.com/weaviate/weaviate/entities/modulecapabilities"
    25  	"github.com/weaviate/weaviate/entities/moduletools"
    26  	"github.com/weaviate/weaviate/entities/schema"
    27  	"github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    28  )
    29  
    30  func TestProvider_ValidateVectorizer(t *testing.T) {
    31  	t.Run("with vectorizer module", func(t *testing.T) {
    32  		p := NewProvider()
    33  		vec := newDummyModule("some-module", modulecapabilities.Text2Vec)
    34  		p.Register(vec)
    35  
    36  		err := p.ValidateVectorizer(vec.Name())
    37  		assert.Nil(t, err)
    38  	})
    39  
    40  	t.Run("with reference vectorizer module", func(t *testing.T) {
    41  		p := NewProvider()
    42  		refVec := newDummyModule("some-module", modulecapabilities.Ref2Vec)
    43  		p.Register(refVec)
    44  
    45  		err := p.ValidateVectorizer(refVec.Name())
    46  		assert.Nil(t, err)
    47  	})
    48  
    49  	t.Run("with non-vectorizer module", func(t *testing.T) {
    50  		modName := "some-module"
    51  		p := NewProvider()
    52  		nonVec := newDummyModule(modName, "")
    53  		p.Register(nonVec)
    54  
    55  		expectedErr := fmt.Sprintf(
    56  			"module %q exists, but does not provide the Vectorizer or ReferenceVectorizer capability",
    57  			modName)
    58  		err := p.ValidateVectorizer(nonVec.Name())
    59  		assert.EqualError(t, err, expectedErr)
    60  	})
    61  
    62  	t.Run("with unregistered module", func(t *testing.T) {
    63  		modName := "does-not-exist"
    64  		p := NewProvider()
    65  		expectedErr := fmt.Sprintf(
    66  			"no module with name %q present",
    67  			modName)
    68  		err := p.ValidateVectorizer(modName)
    69  		assert.EqualError(t, err, expectedErr)
    70  	})
    71  }
    72  
    73  func TestProvider_UsingRef2Vec(t *testing.T) {
    74  	t.Run("with ReferenceVectorizer", func(t *testing.T) {
    75  		modName := "some-module"
    76  		className := "SomeClass"
    77  		mod := newDummyModule(modName, modulecapabilities.Ref2Vec)
    78  		sch := schema.Schema{Objects: &models.Schema{
    79  			Classes: []*models.Class{{
    80  				Class: className,
    81  				ModuleConfig: map[string]interface{}{
    82  					modName: struct{}{},
    83  				},
    84  			}},
    85  		}}
    86  		p := NewProvider()
    87  		p.SetSchemaGetter(&fakeSchemaGetter{sch})
    88  		p.Register(mod)
    89  		assert.True(t, p.UsingRef2Vec(className))
    90  	})
    91  
    92  	t.Run("with Vectorizer", func(t *testing.T) {
    93  		modName := "some-module"
    94  		className := "SomeClass"
    95  		mod := newDummyModule(modName, modulecapabilities.Text2Vec)
    96  		sch := schema.Schema{Objects: &models.Schema{
    97  			Classes: []*models.Class{{
    98  				Class: className,
    99  				ModuleConfig: map[string]interface{}{
   100  					modName: struct{}{},
   101  				},
   102  			}},
   103  		}}
   104  		p := NewProvider()
   105  		p.SetSchemaGetter(&fakeSchemaGetter{sch})
   106  		p.Register(mod)
   107  		assert.False(t, p.UsingRef2Vec(className))
   108  	})
   109  
   110  	t.Run("with nonexistent class", func(t *testing.T) {
   111  		className := "SomeClass"
   112  		mod := newDummyModule("", "")
   113  
   114  		p := NewProvider()
   115  		p.SetSchemaGetter(&fakeSchemaGetter{schema.Schema{}})
   116  		p.Register(mod)
   117  		assert.False(t, p.UsingRef2Vec(className))
   118  	})
   119  
   120  	t.Run("with empty class module config", func(t *testing.T) {
   121  		modName := "some-module"
   122  		className := "SomeClass"
   123  		mod := newDummyModule(modName, modulecapabilities.Text2Vec)
   124  		sch := schema.Schema{Objects: &models.Schema{
   125  			Classes: []*models.Class{{
   126  				Class: className,
   127  			}},
   128  		}}
   129  		p := NewProvider()
   130  		p.SetSchemaGetter(&fakeSchemaGetter{sch})
   131  		p.Register(mod)
   132  		assert.False(t, p.UsingRef2Vec(className))
   133  	})
   134  
   135  	t.Run("with unregistered module", func(t *testing.T) {
   136  		modName := "some-module"
   137  		className := "SomeClass"
   138  		sch := schema.Schema{Objects: &models.Schema{
   139  			Classes: []*models.Class{{
   140  				Class: className,
   141  				ModuleConfig: map[string]interface{}{
   142  					modName: struct{}{},
   143  				},
   144  			}},
   145  		}}
   146  		p := NewProvider()
   147  		p.SetSchemaGetter(&fakeSchemaGetter{sch})
   148  		assert.False(t, p.UsingRef2Vec(className))
   149  	})
   150  }
   151  
   152  func TestProvider_UpdateVector(t *testing.T) {
   153  	t.Run("with Vectorizer", func(t *testing.T) {
   154  		ctx := context.Background()
   155  		modName := "some-vzr"
   156  		className := "SomeClass"
   157  		mod := newDummyModule(modName, modulecapabilities.Text2Vec)
   158  		class := models.Class{
   159  			Class: className,
   160  			ModuleConfig: map[string]interface{}{
   161  				modName: struct{}{},
   162  			},
   163  			VectorIndexConfig: hnsw.UserConfig{},
   164  		}
   165  		sch := schema.Schema{
   166  			Objects: &models.Schema{
   167  				Classes: []*models.Class{&class},
   168  			},
   169  		}
   170  		repo := &fakeObjectsRepo{}
   171  		logger, _ := test.NewNullLogger()
   172  
   173  		p := NewProvider()
   174  		p.Register(mod)
   175  		p.SetSchemaGetter(&fakeSchemaGetter{sch})
   176  
   177  		obj := &models.Object{Class: className, ID: newUUID()}
   178  		err := p.UpdateVector(ctx, obj, &class, compFactoryFn(obj, &class), repo.Object, logger)
   179  		assert.Nil(t, err)
   180  	})
   181  
   182  	t.Run("with ReferenceVectorizer", func(t *testing.T) {
   183  		ctx := context.Background()
   184  		modName := "some-vzr"
   185  		className := "SomeClass"
   186  		mod := newDummyModule(modName, modulecapabilities.Ref2Vec)
   187  		class := &models.Class{
   188  			Class: className,
   189  			ModuleConfig: map[string]interface{}{
   190  				modName: struct{}{},
   191  			},
   192  			VectorIndexConfig: hnsw.UserConfig{},
   193  		}
   194  
   195  		sch := schema.Schema{Objects: &models.Schema{
   196  			Classes: []*models.Class{class},
   197  		}}
   198  		repo := &fakeObjectsRepo{}
   199  		logger, _ := test.NewNullLogger()
   200  
   201  		p := NewProvider()
   202  		p.Register(mod)
   203  		p.SetSchemaGetter(&fakeSchemaGetter{sch})
   204  
   205  		obj := &models.Object{Class: className, ID: newUUID()}
   206  		err := p.UpdateVector(ctx, obj, class, compFactoryFn(obj, class), repo.Object, logger)
   207  		assert.Nil(t, err)
   208  	})
   209  
   210  	t.Run("with nonexistent class", func(t *testing.T) {
   211  		ctx := context.Background()
   212  		class := &models.Class{
   213  			Class:             "SomeClass",
   214  			VectorIndexConfig: hnsw.UserConfig{},
   215  		}
   216  		mod := newDummyModule("", "")
   217  		repo := &fakeObjectsRepo{}
   218  		logger, _ := test.NewNullLogger()
   219  
   220  		p := NewProvider()
   221  		p.Register(mod)
   222  		p.SetSchemaGetter(&fakeSchemaGetter{schema.Schema{}})
   223  
   224  		obj := &models.Object{Class: "Other Class", ID: newUUID()}
   225  		err := p.UpdateVector(ctx, obj, class, compFactoryFn(obj, class), repo.Object, logger)
   226  		expectedErr := fmt.Sprintf("class %v not present", obj.Class)
   227  		assert.EqualError(t, err, expectedErr)
   228  	})
   229  
   230  	t.Run("with nonexistent vector index config type", func(t *testing.T) {
   231  		ctx := context.Background()
   232  		modName := "some-vzr"
   233  		className := "SomeClass"
   234  		mod := newDummyModule(modName, modulecapabilities.Ref2Vec)
   235  		class := &models.Class{
   236  			Class: className,
   237  			ModuleConfig: map[string]interface{}{
   238  				modName: struct{}{},
   239  			},
   240  			VectorIndexConfig: struct{}{},
   241  		}
   242  		sch := schema.Schema{Objects: &models.Schema{
   243  			Classes: []*models.Class{class},
   244  		}}
   245  		repo := &fakeObjectsRepo{}
   246  		logger, _ := test.NewNullLogger()
   247  
   248  		p := NewProvider()
   249  		p.Register(mod)
   250  		p.SetSchemaGetter(&fakeSchemaGetter{sch})
   251  
   252  		obj := &models.Object{Class: className, ID: newUUID()}
   253  		err := p.UpdateVector(ctx, obj, class, compFactoryFn(obj, class), repo.Object, logger)
   254  		expectedErr := "vector index config (struct {}) is not of type HNSW, " +
   255  			"but objects manager is restricted to HNSW"
   256  		assert.EqualError(t, err, expectedErr)
   257  	})
   258  }
   259  
   260  func newUUID() strfmt.UUID {
   261  	return strfmt.UUID(uuid.NewString())
   262  }
   263  
   264  func compFactoryFn(object *models.Object, class *models.Class) moduletools.PropsComparatorFactory {
   265  	return func() (moduletools.VectorizablePropsComparator, error) {
   266  		return moduletools.NewVectorizablePropsComparatorDummy(class.Properties, object.Properties), nil
   267  	}
   268  }