github.com/weaviate/weaviate@v1.24.6/modules/text2vec-cohere/vectorizer/class_settings.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package vectorizer 13 14 import ( 15 "fmt" 16 17 "github.com/pkg/errors" 18 19 "github.com/weaviate/weaviate/entities/models" 20 "github.com/weaviate/weaviate/entities/moduletools" 21 "github.com/weaviate/weaviate/entities/schema" 22 basesettings "github.com/weaviate/weaviate/usecases/modulecomponents/settings" 23 ) 24 25 const ( 26 DefaultBaseURL = "https://api.cohere.ai" 27 DefaultCohereModel = "embed-multilingual-v3.0" 28 DefaultTruncate = "END" 29 DefaultVectorizeClassName = true 30 DefaultPropertyIndexed = true 31 DefaultVectorizePropertyName = false 32 ) 33 34 var ( 35 availableCohereModels = []string{ 36 "medium", 37 "large", "small", "multilingual-22-12", 38 "embed-english-v2.0", "embed-english-light-v2.0", "embed-multilingual-v2.0", 39 "embed-english-v3.0", "embed-english-light-v3.0", "embed-multilingual-v3.0", "embed-multilingual-light-v3.0", 40 } 41 experimetnalCohereModels = []string{"multilingual-2210-alpha"} 42 availableTruncates = []string{"NONE", "START", "END", "LEFT", "RIGHT"} 43 ) 44 45 type classSettings struct { 46 basesettings.BaseClassSettings 47 cfg moduletools.ClassConfig 48 } 49 50 func NewClassSettings(cfg moduletools.ClassConfig) *classSettings { 51 return &classSettings{cfg: cfg, BaseClassSettings: *basesettings.NewBaseClassSettings(cfg)} 52 } 53 54 func (cs *classSettings) Model() string { 55 return cs.getProperty("model", DefaultCohereModel) 56 } 57 58 func (cs *classSettings) Truncate() string { 59 return cs.getProperty("truncate", DefaultTruncate) 60 } 61 62 func (cs *classSettings) BaseURL() string { 63 return cs.getProperty("baseURL", DefaultBaseURL) 64 } 65 66 func (cs *classSettings) Validate(class *models.Class) error { 67 if cs.cfg == nil { 68 // we would receive a nil-config on cross-class requests, such as Explore{} 69 return errors.New("empty config") 70 } 71 72 if err := cs.BaseClassSettings.Validate(); err != nil { 73 return err 74 } 75 76 model := cs.Model() 77 if !cs.validateCohereSetting(model, append(availableCohereModels, experimetnalCohereModels...)) { 78 return errors.Errorf("wrong Cohere model name, available model names are: %v", availableCohereModels) 79 } 80 truncate := cs.Truncate() 81 if !cs.validateCohereSetting(truncate, availableTruncates) { 82 return errors.Errorf("wrong truncate type, available types are: %v", availableTruncates) 83 } 84 85 err := cs.validateIndexState(class, cs) 86 if err != nil { 87 return err 88 } 89 90 return nil 91 } 92 93 func (cs *classSettings) validateCohereSetting(value string, availableValues []string) bool { 94 for i := range availableValues { 95 if value == availableValues[i] { 96 return true 97 } 98 } 99 return false 100 } 101 102 func (cs *classSettings) getProperty(name, defaultValue string) string { 103 return cs.BaseClassSettings.GetPropertyAsString(name, defaultValue) 104 } 105 106 func (cs *classSettings) validateIndexState(class *models.Class, settings ClassSettings) error { 107 if settings.VectorizeClassName() { 108 // if the user chooses to vectorize the classname, vector-building will 109 // always be possible, no need to investigate further 110 111 return nil 112 } 113 114 // search if there is at least one indexed, string/text prop. If found pass 115 // validation 116 for _, prop := range class.Properties { 117 if len(prop.DataType) < 1 { 118 return errors.Errorf("property %s must have at least one datatype: "+ 119 "got %v", prop.Name, prop.DataType) 120 } 121 122 if prop.DataType[0] != string(schema.DataTypeText) { 123 // we can only vectorize text-like props 124 continue 125 } 126 127 if settings.PropertyIndexed(prop.Name) { 128 // found at least one, this is a valid schema 129 return nil 130 } 131 } 132 133 return fmt.Errorf("invalid properties: didn't find a single property which is " + 134 "of type string or text and is not excluded from indexing. In addition the " + 135 "class name is excluded from vectorization as well, meaning that it cannot be " + 136 "used to determine the vector position. To fix this, set 'vectorizeClassName' " + 137 "to true if the class name is contextionary-valid. Alternatively add at least " + 138 "contextionary-valid text/string property which is not excluded from " + 139 "indexing") 140 }