github.com/weaviate/weaviate@v1.24.6/modules/text2vec-aws/vectorizer/class_settings.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package vectorizer 13 14 import ( 15 "fmt" 16 "strings" 17 18 "github.com/pkg/errors" 19 20 "github.com/weaviate/weaviate/entities/models" 21 "github.com/weaviate/weaviate/entities/moduletools" 22 "github.com/weaviate/weaviate/entities/schema" 23 basesettings "github.com/weaviate/weaviate/usecases/modulecomponents/settings" 24 ) 25 26 const ( 27 serviceProperty = "service" 28 regionProperty = "region" 29 modelProperty = "model" 30 endpointProperty = "endpoint" 31 targetModelProperty = "targetModel" 32 targetVariantProperty = "targetVariant" 33 ) 34 35 const ( 36 DefaultVectorizeClassName = false 37 DefaultPropertyIndexed = true 38 DefaultVectorizePropertyName = false 39 DefaultService = "bedrock" 40 ) 41 42 var availableAWSServices = []string{ 43 "bedrock", 44 "sagemaker", 45 } 46 47 var availableAWSBedrockModels = []string{ 48 "amazon.titan-embed-text-v1", 49 "cohere.embed-english-v3", 50 "cohere.embed-multilingual-v3", 51 } 52 53 type classSettings struct { 54 basesettings.BaseClassSettings 55 cfg moduletools.ClassConfig 56 } 57 58 func NewClassSettings(cfg moduletools.ClassConfig) *classSettings { 59 return &classSettings{cfg: cfg, BaseClassSettings: *basesettings.NewBaseClassSettings(cfg)} 60 } 61 62 func (ic *classSettings) Validate(class *models.Class) error { 63 if ic.cfg == nil { 64 // we would receive a nil-config on cross-class requests, such as Explore{} 65 return errors.New("empty config") 66 } 67 68 var errorMessages []string 69 70 if err := ic.BaseClassSettings.Validate(); err != nil { 71 errorMessages = append(errorMessages, err.Error()) 72 } 73 74 service := ic.Service() 75 if service == "" || !ic.validatAvailableAWSSetting(service, availableAWSServices) { 76 errorMessages = append(errorMessages, fmt.Sprintf("wrong %s, available services are: %v", serviceProperty, availableAWSServices)) 77 } 78 region := ic.Region() 79 if region == "" { 80 errorMessages = append(errorMessages, fmt.Sprintf("%s cannot be empty", regionProperty)) 81 } 82 83 if isBedrock(service) { 84 model := ic.Model() 85 if model == "" || !ic.validatAvailableAWSSetting(model, availableAWSBedrockModels) { 86 errorMessages = append(errorMessages, fmt.Sprintf("wrong %s, available models are: %v", modelProperty, availableAWSBedrockModels)) 87 } 88 endpoint := ic.Endpoint() 89 if endpoint != "" { 90 errorMessages = append(errorMessages, fmt.Sprintf("wrong configuration: %s, not applicable to %s", endpoint, service)) 91 } 92 } 93 94 if isSagemaker(service) { 95 endpoint := ic.Endpoint() 96 if endpoint == "" { 97 errorMessages = append(errorMessages, fmt.Sprintf("%s cannot be empty", endpointProperty)) 98 } 99 model := ic.Model() 100 if model != "" { 101 errorMessages = append(errorMessages, fmt.Sprintf("wrong configuration: %s, not applicable to %s. did you mean %s", modelProperty, service, targetModelProperty)) 102 } 103 } 104 105 if len(errorMessages) > 0 { 106 return fmt.Errorf("%s", strings.Join(errorMessages, ", ")) 107 } 108 109 err := ic.validateIndexState(class, ic) 110 if err != nil { 111 return err 112 } 113 114 return nil 115 } 116 117 func (ic *classSettings) validatAvailableAWSSetting(value string, availableValues []string) bool { 118 for i := range availableValues { 119 if value == availableValues[i] { 120 return true 121 } 122 } 123 return false 124 } 125 126 func (ic *classSettings) getStringProperty(name, defaultValue string) string { 127 return ic.BaseClassSettings.GetPropertyAsString(name, defaultValue) 128 } 129 130 func (cv *classSettings) validateIndexState(class *models.Class, settings ClassSettings) error { 131 if settings.VectorizeClassName() { 132 // if the user chooses to vectorize the classname, vector-building will 133 // always be possible, no need to investigate further 134 135 return nil 136 } 137 138 // search if there is at least one indexed, string/text prop. If found pass 139 // validation 140 for _, prop := range class.Properties { 141 if len(prop.DataType) < 1 { 142 return errors.Errorf("property %s must have at least one datatype: "+ 143 "got %v", prop.Name, prop.DataType) 144 } 145 146 if prop.DataType[0] != string(schema.DataTypeString) && 147 prop.DataType[0] != string(schema.DataTypeText) { 148 // we can only vectorize text-like props 149 continue 150 } 151 152 if settings.PropertyIndexed(prop.Name) { 153 // found at least one, this is a valid schema 154 return nil 155 } 156 } 157 158 return fmt.Errorf("invalid properties: didn't find a single property which is " + 159 "of type string or text and is not excluded from indexing. In addition the " + 160 "class name is excluded from vectorization as well, meaning that it cannot be " + 161 "used to determine the vector position. To fix this, set 'vectorizeClassName' " + 162 "to true if the class name is contextionary-valid. Alternatively add at least " + 163 "contextionary-valid text/string property which is not excluded from " + 164 "indexing") 165 } 166 167 // Aws params 168 func (ic *classSettings) Service() string { 169 return ic.getStringProperty(serviceProperty, DefaultService) 170 } 171 172 func (ic *classSettings) Region() string { 173 return ic.getStringProperty(regionProperty, "") 174 } 175 176 func (ic *classSettings) Model() string { 177 return ic.getStringProperty(modelProperty, "") 178 } 179 180 func (ic *classSettings) Endpoint() string { 181 return ic.getStringProperty(endpointProperty, "") 182 } 183 184 func (ic *classSettings) TargetModel() string { 185 return ic.getStringProperty(targetModelProperty, "") 186 } 187 188 func (ic *classSettings) TargetVariant() string { 189 return ic.getStringProperty(targetVariantProperty, "") 190 } 191 192 func isSagemaker(service string) bool { 193 return service == "sagemaker" 194 } 195 196 func isBedrock(service string) bool { 197 return service == "bedrock" 198 }