github.com/weaviate/weaviate@v1.24.6/modules/text2vec-palm/vectorizer/class_settings.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package vectorizer 13 14 import ( 15 "fmt" 16 "strings" 17 18 "github.com/pkg/errors" 19 20 "github.com/weaviate/weaviate/entities/models" 21 "github.com/weaviate/weaviate/entities/moduletools" 22 "github.com/weaviate/weaviate/entities/schema" 23 basesettings "github.com/weaviate/weaviate/usecases/modulecomponents/settings" 24 ) 25 26 const ( 27 apiEndpointProperty = "apiEndpoint" 28 projectIDProperty = "projectId" 29 modelIDProperty = "modelId" 30 titleProperty = "titleProperty" 31 ) 32 33 const ( 34 DefaultVectorizeClassName = false 35 DefaultPropertyIndexed = true 36 DefaultVectorizePropertyName = false 37 DefaultApiEndpoint = "us-central1-aiplatform.googleapis.com" 38 DefaultModelID = "textembedding-gecko@001" 39 DefaulGenerativeAIApiEndpoint = "generativelanguage.googleapis.com" 40 DefaulGenerativeAIModelID = "embedding-gecko-001" 41 ) 42 43 var availablePalmModels = []string{ 44 DefaultModelID, 45 "textembedding-gecko@latest", 46 "textembedding-gecko-multilingual@latest", 47 "textembedding-gecko@003", 48 "textembedding-gecko@002", 49 "textembedding-gecko-multilingual@001", 50 "textembedding-gecko@001", 51 } 52 53 var availableGenerativeAIModels = []string{ 54 DefaulGenerativeAIModelID, 55 } 56 57 type classSettings struct { 58 basesettings.BaseClassSettings 59 cfg moduletools.ClassConfig 60 } 61 62 func NewClassSettings(cfg moduletools.ClassConfig) *classSettings { 63 return &classSettings{cfg: cfg, BaseClassSettings: *basesettings.NewBaseClassSettings(cfg)} 64 } 65 66 func (ic *classSettings) Validate(class *models.Class) error { 67 if ic.cfg == nil { 68 // we would receive a nil-config on cross-class requests, such as Explore{} 69 return errors.New("empty config") 70 } 71 72 var errorMessages []string 73 74 if err := ic.BaseClassSettings.Validate(); err != nil { 75 errorMessages = append(errorMessages, err.Error()) 76 } 77 78 apiEndpoint := ic.ApiEndpoint() 79 model := ic.ModelID() 80 if apiEndpoint == DefaulGenerativeAIApiEndpoint { 81 if model != "" && !ic.validatePalmSetting(model, availableGenerativeAIModels) { 82 errorMessages = append(errorMessages, fmt.Sprintf("wrong %s available Generative AI model names are: %v", modelIDProperty, availableGenerativeAIModels)) 83 } 84 } else { 85 projectID := ic.ProjectID() 86 if projectID == "" { 87 errorMessages = append(errorMessages, fmt.Sprintf("%s cannot be empty", projectIDProperty)) 88 } 89 if model != "" && !ic.validatePalmSetting(model, availablePalmModels) { 90 errorMessages = append(errorMessages, fmt.Sprintf("wrong %s available model names are: %v", modelIDProperty, availablePalmModels)) 91 } 92 } 93 94 if len(errorMessages) > 0 { 95 return fmt.Errorf("%s", strings.Join(errorMessages, ", ")) 96 } 97 98 err := ic.validateIndexState(class, ic) 99 if err != nil { 100 return err 101 } 102 103 return nil 104 } 105 106 func (ic *classSettings) validatePalmSetting(value string, availableValues []string) bool { 107 for i := range availableValues { 108 if value == availableValues[i] { 109 return true 110 } 111 } 112 return false 113 } 114 115 func (ic *classSettings) getStringProperty(name, defaultValue string) string { 116 return ic.BaseClassSettings.GetPropertyAsString(name, defaultValue) 117 } 118 119 func (cv *classSettings) validateIndexState(class *models.Class, settings ClassSettings) error { 120 if settings.VectorizeClassName() { 121 // if the user chooses to vectorize the classname, vector-building will 122 // always be possible, no need to investigate further 123 124 return nil 125 } 126 127 // search if there is at least one indexed, string/text prop. If found pass 128 // validation 129 for _, prop := range class.Properties { 130 if len(prop.DataType) < 1 { 131 return errors.Errorf("property %s must have at least one datatype: "+ 132 "got %v", prop.Name, prop.DataType) 133 } 134 135 if prop.DataType[0] != string(schema.DataTypeString) && 136 prop.DataType[0] != string(schema.DataTypeText) { 137 // we can only vectorize text-like props 138 continue 139 } 140 141 if settings.PropertyIndexed(prop.Name) { 142 // found at least one, this is a valid schema 143 return nil 144 } 145 } 146 147 return fmt.Errorf("invalid properties: didn't find a single property which is " + 148 "of type string or text and is not excluded from indexing. In addition the " + 149 "class name is excluded from vectorization as well, meaning that it cannot be " + 150 "used to determine the vector position. To fix this, set 'vectorizeClassName' " + 151 "to true if the class name is contextionary-valid. Alternatively add at least " + 152 "contextionary-valid text/string property which is not excluded from " + 153 "indexing") 154 } 155 156 func (ic *classSettings) getDefaultModel(apiEndpoint string) string { 157 if apiEndpoint == DefaulGenerativeAIApiEndpoint { 158 return DefaulGenerativeAIModelID 159 } 160 return DefaultModelID 161 } 162 163 // PaLM params 164 func (ic *classSettings) ApiEndpoint() string { 165 return ic.getStringProperty(apiEndpointProperty, DefaultApiEndpoint) 166 } 167 168 func (ic *classSettings) ProjectID() string { 169 return ic.getStringProperty(projectIDProperty, "") 170 } 171 172 func (ic *classSettings) ModelID() string { 173 return ic.getStringProperty(modelIDProperty, ic.getDefaultModel(ic.ApiEndpoint())) 174 } 175 176 func (ic *classSettings) TitleProperty() string { 177 return ic.getStringProperty(titleProperty, "") 178 }