github.com/weaviate/weaviate@v1.24.6/modules/multi2vec-clip/module.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package modclip
    13  
    14  import (
    15  	"context"
    16  	"net/http"
    17  	"os"
    18  	"time"
    19  
    20  	"github.com/pkg/errors"
    21  	"github.com/sirupsen/logrus"
    22  	"github.com/weaviate/weaviate/entities/models"
    23  	"github.com/weaviate/weaviate/entities/modulecapabilities"
    24  	"github.com/weaviate/weaviate/entities/moduletools"
    25  	"github.com/weaviate/weaviate/modules/multi2vec-clip/clients"
    26  	"github.com/weaviate/weaviate/modules/multi2vec-clip/vectorizer"
    27  )
    28  
    29  func New() *ClipModule {
    30  	return &ClipModule{}
    31  }
    32  
    33  type ClipModule struct {
    34  	imageVectorizer          imageVectorizer
    35  	nearImageGraphqlProvider modulecapabilities.GraphQLArguments
    36  	nearImageSearcher        modulecapabilities.Searcher
    37  	textVectorizer           textVectorizer
    38  	nearTextGraphqlProvider  modulecapabilities.GraphQLArguments
    39  	nearTextSearcher         modulecapabilities.Searcher
    40  	nearTextTransformer      modulecapabilities.TextTransform
    41  	metaClient               metaClient
    42  }
    43  
    44  type metaClient interface {
    45  	MetaInfo() (map[string]interface{}, error)
    46  }
    47  
    48  type imageVectorizer interface {
    49  	Object(ctx context.Context, obj *models.Object, comp moduletools.VectorizablePropsComparator,
    50  		cfg moduletools.ClassConfig) ([]float32, models.AdditionalProperties, error)
    51  	VectorizeImage(ctx context.Context, id, image string, cfg moduletools.ClassConfig) ([]float32, error)
    52  }
    53  
    54  type textVectorizer interface {
    55  	Texts(ctx context.Context, input []string,
    56  		cfg moduletools.ClassConfig) ([]float32, error)
    57  }
    58  
    59  func (m *ClipModule) Name() string {
    60  	return "multi2vec-clip"
    61  }
    62  
    63  func (m *ClipModule) Type() modulecapabilities.ModuleType {
    64  	return modulecapabilities.Multi2Vec
    65  }
    66  
    67  func (m *ClipModule) Init(ctx context.Context,
    68  	params moduletools.ModuleInitParams,
    69  ) error {
    70  	if err := m.initVectorizer(ctx, params.GetConfig().ModuleHttpClientTimeout, params.GetLogger()); err != nil {
    71  		return errors.Wrap(err, "init vectorizer")
    72  	}
    73  
    74  	if err := m.initNearImage(); err != nil {
    75  		return errors.Wrap(err, "init near text")
    76  	}
    77  
    78  	return nil
    79  }
    80  
    81  func (m *ClipModule) InitExtension(modules []modulecapabilities.Module) error {
    82  	for _, module := range modules {
    83  		if module.Name() == m.Name() {
    84  			continue
    85  		}
    86  		if arg, ok := module.(modulecapabilities.TextTransformers); ok {
    87  			if arg != nil && arg.TextTransformers() != nil {
    88  				m.nearTextTransformer = arg.TextTransformers()["nearText"]
    89  			}
    90  		}
    91  	}
    92  
    93  	if err := m.initNearText(); err != nil {
    94  		return errors.Wrap(err, "init near text")
    95  	}
    96  
    97  	return nil
    98  }
    99  
   100  func (m *ClipModule) initVectorizer(ctx context.Context, timeout time.Duration,
   101  	logger logrus.FieldLogger,
   102  ) error {
   103  	uri := os.Getenv("CLIP_INFERENCE_API")
   104  	if uri == "" {
   105  		return errors.Errorf("required variable CLIP_INFERENCE_API is not set")
   106  	}
   107  
   108  	client := clients.New(uri, timeout, logger)
   109  	if err := client.WaitForStartup(ctx, 1*time.Second); err != nil {
   110  		return errors.Wrap(err, "init remote vectorizer")
   111  	}
   112  
   113  	m.imageVectorizer = vectorizer.New(client)
   114  	m.textVectorizer = vectorizer.New(client)
   115  	m.metaClient = client
   116  
   117  	return nil
   118  }
   119  
   120  func (m *ClipModule) RootHandler() http.Handler {
   121  	// TODO: remove once this is a capability interface
   122  	return nil
   123  }
   124  
   125  func (m *ClipModule) VectorizeObject(ctx context.Context,
   126  	obj *models.Object, comp moduletools.VectorizablePropsComparator, cfg moduletools.ClassConfig,
   127  ) ([]float32, models.AdditionalProperties, error) {
   128  	return m.imageVectorizer.Object(ctx, obj, comp, cfg)
   129  }
   130  
   131  func (m *ClipModule) MetaInfo() (map[string]interface{}, error) {
   132  	return m.metaClient.MetaInfo()
   133  }
   134  
   135  func (m *ClipModule) VectorizeInput(ctx context.Context,
   136  	input string, cfg moduletools.ClassConfig,
   137  ) ([]float32, error) {
   138  	return m.textVectorizer.Texts(ctx, []string{input}, cfg)
   139  }
   140  
   141  // verify we implement the modules.Module interface
   142  var (
   143  	_ = modulecapabilities.Module(New())
   144  	_ = modulecapabilities.Vectorizer(New())
   145  	_ = modulecapabilities.InputVectorizer(New())
   146  )