github.com/weaviate/weaviate@v1.24.6/modules/multi2vec-clip/vectorizer/vectorizer.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package vectorizer 13 14 import ( 15 "context" 16 17 "github.com/pkg/errors" 18 19 "github.com/go-openapi/strfmt" 20 "github.com/weaviate/weaviate/entities/models" 21 "github.com/weaviate/weaviate/entities/moduletools" 22 "github.com/weaviate/weaviate/modules/multi2vec-clip/ent" 23 objectsvectorizer "github.com/weaviate/weaviate/usecases/modulecomponents/vectorizer" 24 libvectorizer "github.com/weaviate/weaviate/usecases/vectorizer" 25 ) 26 27 type Vectorizer struct { 28 client Client 29 objectVectorizer *objectsvectorizer.ObjectVectorizer 30 } 31 32 func New(client Client) *Vectorizer { 33 return &Vectorizer{ 34 client: client, 35 objectVectorizer: objectsvectorizer.New(), 36 } 37 } 38 39 type Client interface { 40 Vectorize(ctx context.Context, 41 texts, images []string, config ent.VectorizationConfig) (*ent.VectorizationResult, error) 42 } 43 44 type ClassSettings interface { 45 ImageField(property string) bool 46 ImageFieldsWeights() ([]float32, error) 47 TextField(property string) bool 48 TextFieldsWeights() ([]float32, error) 49 InferenceURL() string 50 } 51 52 func (v *Vectorizer) Object(ctx context.Context, object *models.Object, 53 comp moduletools.VectorizablePropsComparator, cfg moduletools.ClassConfig, 54 ) ([]float32, models.AdditionalProperties, error) { 55 vec, err := v.object(ctx, object.ID, comp, cfg) 56 return vec, nil, err 57 } 58 59 func (v *Vectorizer) VectorizeImage(ctx context.Context, id, image string, cfg moduletools.ClassConfig) ([]float32, error) { 60 res, err := v.client.Vectorize(ctx, []string{}, []string{image}, v.getVectorizationConfig(cfg)) 61 if err != nil { 62 return nil, err 63 } 64 if len(res.ImageVectors) != 1 { 65 return nil, errors.New("empty vector") 66 } 67 68 return res.ImageVectors[0], nil 69 } 70 71 func (v *Vectorizer) object(ctx context.Context, id strfmt.UUID, 72 comp moduletools.VectorizablePropsComparator, cfg moduletools.ClassConfig, 73 ) ([]float32, error) { 74 ichek := NewClassSettings(cfg) 75 prevVector := comp.PrevVector() 76 if cfg.TargetVector() != "" { 77 prevVector = comp.PrevVectorForName(cfg.TargetVector()) 78 } 79 80 vectorize := prevVector == nil 81 82 // vectorize image and text 83 texts := []string{} 84 images := []string{} 85 86 it := comp.PropsIterator() 87 for propName, propValue, ok := it.Next(); ok; propName, propValue, ok = it.Next() { 88 switch typed := propValue.(type) { 89 case string: 90 if ichek.ImageField(propName) { 91 vectorize = vectorize || comp.IsChanged(propName) 92 images = append(images, typed) 93 } 94 if ichek.TextField(propName) { 95 vectorize = vectorize || comp.IsChanged(propName) 96 texts = append(texts, typed) 97 } 98 99 case []string: 100 if ichek.TextField(propName) { 101 vectorize = vectorize || comp.IsChanged(propName) 102 texts = append(texts, typed...) 103 } 104 105 case nil: 106 if ichek.ImageField(propName) || ichek.TextField(propName) { 107 vectorize = vectorize || comp.IsChanged(propName) 108 } 109 } 110 } 111 112 // no property was changed, old vector can be used 113 if !vectorize { 114 return prevVector, nil 115 } 116 117 vectors := [][]float32{} 118 if len(texts) > 0 || len(images) > 0 { 119 res, err := v.client.Vectorize(ctx, texts, images, v.getVectorizationConfig(cfg)) 120 if err != nil { 121 return nil, err 122 } 123 vectors = append(vectors, res.TextVectors...) 124 vectors = append(vectors, res.ImageVectors...) 125 } 126 weights, err := v.getWeights(ichek) 127 if err != nil { 128 return nil, err 129 } 130 131 return libvectorizer.CombineVectorsWithWeights(vectors, weights), nil 132 } 133 134 func (v *Vectorizer) getWeights(ichek ClassSettings) ([]float32, error) { 135 weights := []float32{} 136 textFieldsWeights, err := ichek.TextFieldsWeights() 137 if err != nil { 138 return nil, err 139 } 140 imageFieldsWeights, err := ichek.ImageFieldsWeights() 141 if err != nil { 142 return nil, err 143 } 144 145 weights = append(weights, textFieldsWeights...) 146 weights = append(weights, imageFieldsWeights...) 147 148 normalizedWeights := v.normalizeWeights(weights) 149 150 return normalizedWeights, nil 151 } 152 153 func (v *Vectorizer) normalizeWeights(weights []float32) []float32 { 154 if len(weights) > 0 { 155 var denominator float32 156 for i := range weights { 157 denominator += weights[i] 158 } 159 normalizer := 1 / denominator 160 normalized := make([]float32, len(weights)) 161 for i := range weights { 162 normalized[i] = weights[i] * normalizer 163 } 164 return normalized 165 } 166 return nil 167 }