github.com/weaviate/weaviate@v1.24.6/modules/multi2vec-clip/vectorizer/vectorizer.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"context"
    16  
    17  	"github.com/pkg/errors"
    18  
    19  	"github.com/go-openapi/strfmt"
    20  	"github.com/weaviate/weaviate/entities/models"
    21  	"github.com/weaviate/weaviate/entities/moduletools"
    22  	"github.com/weaviate/weaviate/modules/multi2vec-clip/ent"
    23  	objectsvectorizer "github.com/weaviate/weaviate/usecases/modulecomponents/vectorizer"
    24  	libvectorizer "github.com/weaviate/weaviate/usecases/vectorizer"
    25  )
    26  
    27  type Vectorizer struct {
    28  	client           Client
    29  	objectVectorizer *objectsvectorizer.ObjectVectorizer
    30  }
    31  
    32  func New(client Client) *Vectorizer {
    33  	return &Vectorizer{
    34  		client:           client,
    35  		objectVectorizer: objectsvectorizer.New(),
    36  	}
    37  }
    38  
    39  type Client interface {
    40  	Vectorize(ctx context.Context,
    41  		texts, images []string, config ent.VectorizationConfig) (*ent.VectorizationResult, error)
    42  }
    43  
    44  type ClassSettings interface {
    45  	ImageField(property string) bool
    46  	ImageFieldsWeights() ([]float32, error)
    47  	TextField(property string) bool
    48  	TextFieldsWeights() ([]float32, error)
    49  	InferenceURL() string
    50  }
    51  
    52  func (v *Vectorizer) Object(ctx context.Context, object *models.Object,
    53  	comp moduletools.VectorizablePropsComparator, cfg moduletools.ClassConfig,
    54  ) ([]float32, models.AdditionalProperties, error) {
    55  	vec, err := v.object(ctx, object.ID, comp, cfg)
    56  	return vec, nil, err
    57  }
    58  
    59  func (v *Vectorizer) VectorizeImage(ctx context.Context, id, image string, cfg moduletools.ClassConfig) ([]float32, error) {
    60  	res, err := v.client.Vectorize(ctx, []string{}, []string{image}, v.getVectorizationConfig(cfg))
    61  	if err != nil {
    62  		return nil, err
    63  	}
    64  	if len(res.ImageVectors) != 1 {
    65  		return nil, errors.New("empty vector")
    66  	}
    67  
    68  	return res.ImageVectors[0], nil
    69  }
    70  
    71  func (v *Vectorizer) object(ctx context.Context, id strfmt.UUID,
    72  	comp moduletools.VectorizablePropsComparator, cfg moduletools.ClassConfig,
    73  ) ([]float32, error) {
    74  	ichek := NewClassSettings(cfg)
    75  	prevVector := comp.PrevVector()
    76  	if cfg.TargetVector() != "" {
    77  		prevVector = comp.PrevVectorForName(cfg.TargetVector())
    78  	}
    79  
    80  	vectorize := prevVector == nil
    81  
    82  	// vectorize image and text
    83  	texts := []string{}
    84  	images := []string{}
    85  
    86  	it := comp.PropsIterator()
    87  	for propName, propValue, ok := it.Next(); ok; propName, propValue, ok = it.Next() {
    88  		switch typed := propValue.(type) {
    89  		case string:
    90  			if ichek.ImageField(propName) {
    91  				vectorize = vectorize || comp.IsChanged(propName)
    92  				images = append(images, typed)
    93  			}
    94  			if ichek.TextField(propName) {
    95  				vectorize = vectorize || comp.IsChanged(propName)
    96  				texts = append(texts, typed)
    97  			}
    98  
    99  		case []string:
   100  			if ichek.TextField(propName) {
   101  				vectorize = vectorize || comp.IsChanged(propName)
   102  				texts = append(texts, typed...)
   103  			}
   104  
   105  		case nil:
   106  			if ichek.ImageField(propName) || ichek.TextField(propName) {
   107  				vectorize = vectorize || comp.IsChanged(propName)
   108  			}
   109  		}
   110  	}
   111  
   112  	// no property was changed, old vector can be used
   113  	if !vectorize {
   114  		return prevVector, nil
   115  	}
   116  
   117  	vectors := [][]float32{}
   118  	if len(texts) > 0 || len(images) > 0 {
   119  		res, err := v.client.Vectorize(ctx, texts, images, v.getVectorizationConfig(cfg))
   120  		if err != nil {
   121  			return nil, err
   122  		}
   123  		vectors = append(vectors, res.TextVectors...)
   124  		vectors = append(vectors, res.ImageVectors...)
   125  	}
   126  	weights, err := v.getWeights(ichek)
   127  	if err != nil {
   128  		return nil, err
   129  	}
   130  
   131  	return libvectorizer.CombineVectorsWithWeights(vectors, weights), nil
   132  }
   133  
   134  func (v *Vectorizer) getWeights(ichek ClassSettings) ([]float32, error) {
   135  	weights := []float32{}
   136  	textFieldsWeights, err := ichek.TextFieldsWeights()
   137  	if err != nil {
   138  		return nil, err
   139  	}
   140  	imageFieldsWeights, err := ichek.ImageFieldsWeights()
   141  	if err != nil {
   142  		return nil, err
   143  	}
   144  
   145  	weights = append(weights, textFieldsWeights...)
   146  	weights = append(weights, imageFieldsWeights...)
   147  
   148  	normalizedWeights := v.normalizeWeights(weights)
   149  
   150  	return normalizedWeights, nil
   151  }
   152  
   153  func (v *Vectorizer) normalizeWeights(weights []float32) []float32 {
   154  	if len(weights) > 0 {
   155  		var denominator float32
   156  		for i := range weights {
   157  			denominator += weights[i]
   158  		}
   159  		normalizer := 1 / denominator
   160  		normalized := make([]float32, len(weights))
   161  		for i := range weights {
   162  			normalized[i] = weights[i] * normalizer
   163  		}
   164  		return normalized
   165  	}
   166  	return nil
   167  }