github.com/weaviate/weaviate@v1.24.6/usecases/modulecomponents/vectorizer/object_texts.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package vectorizer 13 14 import ( 15 "context" 16 "fmt" 17 "strings" 18 19 "github.com/fatih/camelcase" 20 "github.com/weaviate/weaviate/entities/moduletools" 21 ) 22 23 type ClassSettings interface { 24 PropertyIndexed(property string) bool 25 VectorizePropertyName(propertyName string) bool 26 VectorizeClassName() bool 27 Properties() []string 28 } 29 30 type ObjectVectorizer struct{} 31 32 func New() *ObjectVectorizer { 33 return &ObjectVectorizer{} 34 } 35 36 func (v *ObjectVectorizer) TextsOrVector(ctx context.Context, className string, 37 comp moduletools.VectorizablePropsComparator, icheck ClassSettings, targetVector string, 38 ) (string, []float32) { 39 text, _, vector := v.TextsOrVectorWithTitleProperty(ctx, className, comp, icheck, "", targetVector) 40 return text, vector 41 } 42 43 func (v *ObjectVectorizer) camelCaseToLower(in string) string { 44 parts := camelcase.Split(in) 45 var sb strings.Builder 46 for i, part := range parts { 47 if part == " " { 48 continue 49 } 50 51 if i > 0 { 52 sb.WriteString(" ") 53 } 54 55 sb.WriteString(strings.ToLower(part)) 56 } 57 58 return sb.String() 59 } 60 61 func (v *ObjectVectorizer) TextsOrVectorWithTitleProperty(ctx context.Context, className string, 62 comp moduletools.VectorizablePropsComparator, icheck ClassSettings, titlePopertyName string, 63 targetVector string, 64 ) (string, string, []float32) { 65 prevVector := comp.PrevVector() 66 if targetVector != "" { 67 prevVector = comp.PrevVectorForName(targetVector) 68 } 69 70 vectorize := prevVector == nil 71 72 var titlePropertyValue []string 73 var corpi []string 74 75 if icheck.VectorizeClassName() { 76 corpi = append(corpi, v.camelCaseToLower(className)) 77 } 78 79 it := comp.PropsIterator() 80 for propName, value, ok := it.Next(); ok; propName, value, ok = it.Next() { 81 if !icheck.PropertyIndexed(propName) { 82 continue 83 } 84 85 switch typed := value.(type) { 86 case string: 87 vectorize = vectorize || comp.IsChanged(propName) 88 isTitleProperty := propName == titlePopertyName 89 90 str := strings.ToLower(typed) 91 if isTitleProperty { 92 titlePropertyValue = append(titlePropertyValue, str) 93 } 94 if icheck.VectorizePropertyName(propName) { 95 str = fmt.Sprintf("%s %s", v.camelCaseToLower(propName), str) 96 } 97 corpi = append(corpi, str) 98 99 case []string: 100 vectorize = vectorize || comp.IsChanged(propName) 101 102 if len(typed) > 0 { 103 isNameVectorizable := icheck.VectorizePropertyName(propName) 104 lowerPropertyName := v.camelCaseToLower(propName) 105 isTitleProperty := propName == titlePopertyName 106 107 for i := range typed { 108 str := strings.ToLower(typed[i]) 109 if isTitleProperty { 110 titlePropertyValue = append(titlePropertyValue, str) 111 } 112 if isNameVectorizable { 113 str = fmt.Sprintf("%s %s", lowerPropertyName, str) 114 } 115 corpi = append(corpi, str) 116 } 117 } 118 119 case nil: 120 vectorize = vectorize || comp.IsChanged(propName) 121 } 122 } 123 124 // no property was changed, old vector can be used 125 if !vectorize { 126 return "", "", prevVector 127 } 128 129 if len(corpi) == 0 { 130 // fall back to using the class name 131 corpi = append(corpi, v.camelCaseToLower(className)) 132 } 133 134 text := strings.Join(corpi, " ") 135 if titlePopertyName == "" { 136 return text, "", nil 137 } 138 titlePropertyVal := strings.Join(titlePropertyValue, " ") 139 return text, titlePropertyVal, nil 140 }