github.com/weaviate/weaviate@v1.24.6/modules/text2vec-transformers/module.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package modtransformers
    13  
    14  import (
    15  	"context"
    16  	"net/http"
    17  	"os"
    18  	"time"
    19  
    20  	"github.com/pkg/errors"
    21  	"github.com/sirupsen/logrus"
    22  	"github.com/weaviate/weaviate/entities/models"
    23  	"github.com/weaviate/weaviate/entities/modulecapabilities"
    24  	"github.com/weaviate/weaviate/entities/moduletools"
    25  	"github.com/weaviate/weaviate/modules/text2vec-transformers/clients"
    26  	"github.com/weaviate/weaviate/modules/text2vec-transformers/vectorizer"
    27  	"github.com/weaviate/weaviate/usecases/modulecomponents/additional"
    28  )
    29  
    30  func New() *TransformersModule {
    31  	return &TransformersModule{}
    32  }
    33  
    34  type TransformersModule struct {
    35  	vectorizer                   textVectorizer
    36  	metaProvider                 metaProvider
    37  	graphqlProvider              modulecapabilities.GraphQLArguments
    38  	searcher                     modulecapabilities.Searcher
    39  	nearTextTransformer          modulecapabilities.TextTransform
    40  	logger                       logrus.FieldLogger
    41  	additionalPropertiesProvider modulecapabilities.AdditionalProperties
    42  }
    43  
    44  type textVectorizer interface {
    45  	Object(ctx context.Context, obj *models.Object, comp moduletools.VectorizablePropsComparator,
    46  		cfg moduletools.ClassConfig) ([]float32, models.AdditionalProperties, error)
    47  	Texts(ctx context.Context, input []string,
    48  		cfg moduletools.ClassConfig) ([]float32, error)
    49  }
    50  
    51  type metaProvider interface {
    52  	MetaInfo() (map[string]interface{}, error)
    53  }
    54  
    55  func (m *TransformersModule) Name() string {
    56  	return "text2vec-transformers"
    57  }
    58  
    59  func (m *TransformersModule) Type() modulecapabilities.ModuleType {
    60  	return modulecapabilities.Text2Vec
    61  }
    62  
    63  func (m *TransformersModule) Init(ctx context.Context,
    64  	params moduletools.ModuleInitParams,
    65  ) error {
    66  	m.logger = params.GetLogger()
    67  
    68  	if err := m.initVectorizer(ctx, params.GetConfig().ModuleHttpClientTimeout, m.logger); err != nil {
    69  		return errors.Wrap(err, "init vectorizer")
    70  	}
    71  
    72  	if err := m.initAdditionalPropertiesProvider(); err != nil {
    73  		return errors.Wrap(err, "init additional properties provider")
    74  	}
    75  
    76  	return nil
    77  }
    78  
    79  func (m *TransformersModule) InitExtension(modules []modulecapabilities.Module) error {
    80  	for _, module := range modules {
    81  		if module.Name() == m.Name() {
    82  			continue
    83  		}
    84  		if arg, ok := module.(modulecapabilities.TextTransformers); ok {
    85  			if arg != nil && arg.TextTransformers() != nil {
    86  				m.nearTextTransformer = arg.TextTransformers()["nearText"]
    87  			}
    88  		}
    89  	}
    90  
    91  	if err := m.initNearText(); err != nil {
    92  		return errors.Wrap(err, "init graphql provider")
    93  	}
    94  	return nil
    95  }
    96  
    97  func (m *TransformersModule) initVectorizer(ctx context.Context, timeout time.Duration,
    98  	logger logrus.FieldLogger,
    99  ) error {
   100  	// TODO: gh-1486 proper config management
   101  	uriPassage := os.Getenv("TRANSFORMERS_PASSAGE_INFERENCE_API")
   102  	uriQuery := os.Getenv("TRANSFORMERS_QUERY_INFERENCE_API")
   103  	uriCommon := os.Getenv("TRANSFORMERS_INFERENCE_API")
   104  
   105  	if uriCommon == "" {
   106  		if uriPassage == "" && uriQuery == "" {
   107  			return errors.Errorf("required variable TRANSFORMERS_INFERENCE_API or both variables TRANSFORMERS_PASSAGE_INFERENCE_API and TRANSFORMERS_QUERY_INFERENCE_API are not set")
   108  		}
   109  		if uriPassage != "" && uriQuery == "" {
   110  			return errors.Errorf("required variable TRANSFORMERS_QUERY_INFERENCE_API is not set")
   111  		}
   112  		if uriPassage == "" && uriQuery != "" {
   113  			return errors.Errorf("required variable TRANSFORMERS_PASSAGE_INFERENCE_API is not set")
   114  		}
   115  	} else {
   116  		if uriPassage != "" || uriQuery != "" {
   117  			return errors.Errorf("either variable TRANSFORMERS_INFERENCE_API or both variables TRANSFORMERS_PASSAGE_INFERENCE_API and TRANSFORMERS_QUERY_INFERENCE_API should be set")
   118  		}
   119  		uriPassage = uriCommon
   120  		uriQuery = uriCommon
   121  	}
   122  
   123  	client := clients.New(uriPassage, uriQuery, timeout, logger)
   124  	if err := client.WaitForStartup(ctx, 1*time.Second); err != nil {
   125  		return errors.Wrap(err, "init remote vectorizer")
   126  	}
   127  
   128  	m.vectorizer = vectorizer.New(client)
   129  	m.metaProvider = client
   130  
   131  	return nil
   132  }
   133  
   134  func (m *TransformersModule) initAdditionalPropertiesProvider() error {
   135  	m.additionalPropertiesProvider = additional.NewText2VecProvider()
   136  	return nil
   137  }
   138  
   139  func (m *TransformersModule) RootHandler() http.Handler {
   140  	// TODO: remove once this is a capability interface
   141  	return nil
   142  }
   143  
   144  func (m *TransformersModule) VectorizeObject(ctx context.Context,
   145  	obj *models.Object, comp moduletools.VectorizablePropsComparator, cfg moduletools.ClassConfig,
   146  ) ([]float32, models.AdditionalProperties, error) {
   147  	return m.vectorizer.Object(ctx, obj, comp, cfg)
   148  }
   149  
   150  func (m *TransformersModule) MetaInfo() (map[string]interface{}, error) {
   151  	return m.metaProvider.MetaInfo()
   152  }
   153  
   154  func (m *TransformersModule) AdditionalProperties() map[string]modulecapabilities.AdditionalProperty {
   155  	return m.additionalPropertiesProvider.AdditionalProperties()
   156  }
   157  
   158  func (m *TransformersModule) VectorizeInput(ctx context.Context,
   159  	input string, cfg moduletools.ClassConfig,
   160  ) ([]float32, error) {
   161  	return m.vectorizer.Texts(ctx, []string{input}, cfg)
   162  }
   163  
   164  // verify we implement the modules.Module interface
   165  var (
   166  	_ = modulecapabilities.Module(New())
   167  	_ = modulecapabilities.Vectorizer(New())
   168  	_ = modulecapabilities.MetaProvider(New())
   169  )