github.com/weaviate/weaviate@v1.24.6/usecases/modulecomponents/vectorizer/object_texts.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"strings"
    18  
    19  	"github.com/fatih/camelcase"
    20  	"github.com/weaviate/weaviate/entities/moduletools"
    21  )
    22  
    23  type ClassSettings interface {
    24  	PropertyIndexed(property string) bool
    25  	VectorizePropertyName(propertyName string) bool
    26  	VectorizeClassName() bool
    27  	Properties() []string
    28  }
    29  
    30  type ObjectVectorizer struct{}
    31  
    32  func New() *ObjectVectorizer {
    33  	return &ObjectVectorizer{}
    34  }
    35  
    36  func (v *ObjectVectorizer) TextsOrVector(ctx context.Context, className string,
    37  	comp moduletools.VectorizablePropsComparator, icheck ClassSettings, targetVector string,
    38  ) (string, []float32) {
    39  	text, _, vector := v.TextsOrVectorWithTitleProperty(ctx, className, comp, icheck, "", targetVector)
    40  	return text, vector
    41  }
    42  
    43  func (v *ObjectVectorizer) camelCaseToLower(in string) string {
    44  	parts := camelcase.Split(in)
    45  	var sb strings.Builder
    46  	for i, part := range parts {
    47  		if part == " " {
    48  			continue
    49  		}
    50  
    51  		if i > 0 {
    52  			sb.WriteString(" ")
    53  		}
    54  
    55  		sb.WriteString(strings.ToLower(part))
    56  	}
    57  
    58  	return sb.String()
    59  }
    60  
    61  func (v *ObjectVectorizer) TextsOrVectorWithTitleProperty(ctx context.Context, className string,
    62  	comp moduletools.VectorizablePropsComparator, icheck ClassSettings, titlePopertyName string,
    63  	targetVector string,
    64  ) (string, string, []float32) {
    65  	prevVector := comp.PrevVector()
    66  	if targetVector != "" {
    67  		prevVector = comp.PrevVectorForName(targetVector)
    68  	}
    69  
    70  	vectorize := prevVector == nil
    71  
    72  	var titlePropertyValue []string
    73  	var corpi []string
    74  
    75  	if icheck.VectorizeClassName() {
    76  		corpi = append(corpi, v.camelCaseToLower(className))
    77  	}
    78  
    79  	it := comp.PropsIterator()
    80  	for propName, value, ok := it.Next(); ok; propName, value, ok = it.Next() {
    81  		if !icheck.PropertyIndexed(propName) {
    82  			continue
    83  		}
    84  
    85  		switch typed := value.(type) {
    86  		case string:
    87  			vectorize = vectorize || comp.IsChanged(propName)
    88  			isTitleProperty := propName == titlePopertyName
    89  
    90  			str := strings.ToLower(typed)
    91  			if isTitleProperty {
    92  				titlePropertyValue = append(titlePropertyValue, str)
    93  			}
    94  			if icheck.VectorizePropertyName(propName) {
    95  				str = fmt.Sprintf("%s %s", v.camelCaseToLower(propName), str)
    96  			}
    97  			corpi = append(corpi, str)
    98  
    99  		case []string:
   100  			vectorize = vectorize || comp.IsChanged(propName)
   101  
   102  			if len(typed) > 0 {
   103  				isNameVectorizable := icheck.VectorizePropertyName(propName)
   104  				lowerPropertyName := v.camelCaseToLower(propName)
   105  				isTitleProperty := propName == titlePopertyName
   106  
   107  				for i := range typed {
   108  					str := strings.ToLower(typed[i])
   109  					if isTitleProperty {
   110  						titlePropertyValue = append(titlePropertyValue, str)
   111  					}
   112  					if isNameVectorizable {
   113  						str = fmt.Sprintf("%s %s", lowerPropertyName, str)
   114  					}
   115  					corpi = append(corpi, str)
   116  				}
   117  			}
   118  
   119  		case nil:
   120  			vectorize = vectorize || comp.IsChanged(propName)
   121  		}
   122  	}
   123  
   124  	// no property was changed, old vector can be used
   125  	if !vectorize {
   126  		return "", "", prevVector
   127  	}
   128  
   129  	if len(corpi) == 0 {
   130  		// fall back to using the class name
   131  		corpi = append(corpi, v.camelCaseToLower(className))
   132  	}
   133  
   134  	text := strings.Join(corpi, " ")
   135  	if titlePopertyName == "" {
   136  		return text, "", nil
   137  	}
   138  	titlePropertyVal := strings.Join(titlePropertyValue, " ")
   139  	return text, titlePropertyVal, nil
   140  }