github.com/weaviate/weaviate@v1.24.6/modules/text2vec-transformers/module.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package modtransformers 13 14 import ( 15 "context" 16 "net/http" 17 "os" 18 "time" 19 20 "github.com/pkg/errors" 21 "github.com/sirupsen/logrus" 22 "github.com/weaviate/weaviate/entities/models" 23 "github.com/weaviate/weaviate/entities/modulecapabilities" 24 "github.com/weaviate/weaviate/entities/moduletools" 25 "github.com/weaviate/weaviate/modules/text2vec-transformers/clients" 26 "github.com/weaviate/weaviate/modules/text2vec-transformers/vectorizer" 27 "github.com/weaviate/weaviate/usecases/modulecomponents/additional" 28 ) 29 30 func New() *TransformersModule { 31 return &TransformersModule{} 32 } 33 34 type TransformersModule struct { 35 vectorizer textVectorizer 36 metaProvider metaProvider 37 graphqlProvider modulecapabilities.GraphQLArguments 38 searcher modulecapabilities.Searcher 39 nearTextTransformer modulecapabilities.TextTransform 40 logger logrus.FieldLogger 41 additionalPropertiesProvider modulecapabilities.AdditionalProperties 42 } 43 44 type textVectorizer interface { 45 Object(ctx context.Context, obj *models.Object, comp moduletools.VectorizablePropsComparator, 46 cfg moduletools.ClassConfig) ([]float32, models.AdditionalProperties, error) 47 Texts(ctx context.Context, input []string, 48 cfg moduletools.ClassConfig) ([]float32, error) 49 } 50 51 type metaProvider interface { 52 MetaInfo() (map[string]interface{}, error) 53 } 54 55 func (m *TransformersModule) Name() string { 56 return "text2vec-transformers" 57 } 58 59 func (m *TransformersModule) Type() modulecapabilities.ModuleType { 60 return modulecapabilities.Text2Vec 61 } 62 63 func (m *TransformersModule) Init(ctx context.Context, 64 params moduletools.ModuleInitParams, 65 ) error { 66 m.logger = params.GetLogger() 67 68 if err := m.initVectorizer(ctx, params.GetConfig().ModuleHttpClientTimeout, m.logger); err != nil { 69 return errors.Wrap(err, "init vectorizer") 70 } 71 72 if err := m.initAdditionalPropertiesProvider(); err != nil { 73 return errors.Wrap(err, "init additional properties provider") 74 } 75 76 return nil 77 } 78 79 func (m *TransformersModule) InitExtension(modules []modulecapabilities.Module) error { 80 for _, module := range modules { 81 if module.Name() == m.Name() { 82 continue 83 } 84 if arg, ok := module.(modulecapabilities.TextTransformers); ok { 85 if arg != nil && arg.TextTransformers() != nil { 86 m.nearTextTransformer = arg.TextTransformers()["nearText"] 87 } 88 } 89 } 90 91 if err := m.initNearText(); err != nil { 92 return errors.Wrap(err, "init graphql provider") 93 } 94 return nil 95 } 96 97 func (m *TransformersModule) initVectorizer(ctx context.Context, timeout time.Duration, 98 logger logrus.FieldLogger, 99 ) error { 100 // TODO: gh-1486 proper config management 101 uriPassage := os.Getenv("TRANSFORMERS_PASSAGE_INFERENCE_API") 102 uriQuery := os.Getenv("TRANSFORMERS_QUERY_INFERENCE_API") 103 uriCommon := os.Getenv("TRANSFORMERS_INFERENCE_API") 104 105 if uriCommon == "" { 106 if uriPassage == "" && uriQuery == "" { 107 return errors.Errorf("required variable TRANSFORMERS_INFERENCE_API or both variables TRANSFORMERS_PASSAGE_INFERENCE_API and TRANSFORMERS_QUERY_INFERENCE_API are not set") 108 } 109 if uriPassage != "" && uriQuery == "" { 110 return errors.Errorf("required variable TRANSFORMERS_QUERY_INFERENCE_API is not set") 111 } 112 if uriPassage == "" && uriQuery != "" { 113 return errors.Errorf("required variable TRANSFORMERS_PASSAGE_INFERENCE_API is not set") 114 } 115 } else { 116 if uriPassage != "" || uriQuery != "" { 117 return errors.Errorf("either variable TRANSFORMERS_INFERENCE_API or both variables TRANSFORMERS_PASSAGE_INFERENCE_API and TRANSFORMERS_QUERY_INFERENCE_API should be set") 118 } 119 uriPassage = uriCommon 120 uriQuery = uriCommon 121 } 122 123 client := clients.New(uriPassage, uriQuery, timeout, logger) 124 if err := client.WaitForStartup(ctx, 1*time.Second); err != nil { 125 return errors.Wrap(err, "init remote vectorizer") 126 } 127 128 m.vectorizer = vectorizer.New(client) 129 m.metaProvider = client 130 131 return nil 132 } 133 134 func (m *TransformersModule) initAdditionalPropertiesProvider() error { 135 m.additionalPropertiesProvider = additional.NewText2VecProvider() 136 return nil 137 } 138 139 func (m *TransformersModule) RootHandler() http.Handler { 140 // TODO: remove once this is a capability interface 141 return nil 142 } 143 144 func (m *TransformersModule) VectorizeObject(ctx context.Context, 145 obj *models.Object, comp moduletools.VectorizablePropsComparator, cfg moduletools.ClassConfig, 146 ) ([]float32, models.AdditionalProperties, error) { 147 return m.vectorizer.Object(ctx, obj, comp, cfg) 148 } 149 150 func (m *TransformersModule) MetaInfo() (map[string]interface{}, error) { 151 return m.metaProvider.MetaInfo() 152 } 153 154 func (m *TransformersModule) AdditionalProperties() map[string]modulecapabilities.AdditionalProperty { 155 return m.additionalPropertiesProvider.AdditionalProperties() 156 } 157 158 func (m *TransformersModule) VectorizeInput(ctx context.Context, 159 input string, cfg moduletools.ClassConfig, 160 ) ([]float32, error) { 161 return m.vectorizer.Texts(ctx, []string{input}, cfg) 162 } 163 164 // verify we implement the modules.Module interface 165 var ( 166 _ = modulecapabilities.Module(New()) 167 _ = modulecapabilities.Vectorizer(New()) 168 _ = modulecapabilities.MetaProvider(New()) 169 )