github.com/weaviate/weaviate@v1.24.6/modules/text2vec-openai/config.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package modopenai 13 14 import ( 15 "context" 16 17 "github.com/weaviate/weaviate/entities/models" 18 "github.com/weaviate/weaviate/entities/modulecapabilities" 19 "github.com/weaviate/weaviate/entities/moduletools" 20 "github.com/weaviate/weaviate/entities/schema" 21 "github.com/weaviate/weaviate/modules/text2vec-openai/vectorizer" 22 ) 23 24 func (m *OpenAIModule) ClassConfigDefaults() map[string]interface{} { 25 return map[string]interface{}{ 26 "vectorizeClassName": vectorizer.DefaultVectorizeClassName, 27 "baseURL": vectorizer.DefaultBaseURL, 28 "model": vectorizer.DefaultOpenAIModel, 29 } 30 } 31 32 func (m *OpenAIModule) PropertyConfigDefaults( 33 dt *schema.DataType, 34 ) map[string]interface{} { 35 return map[string]interface{}{ 36 "skip": !vectorizer.DefaultPropertyIndexed, 37 "vectorizePropertyName": vectorizer.DefaultVectorizePropertyName, 38 } 39 } 40 41 func (m *OpenAIModule) ValidateClass(ctx context.Context, 42 class *models.Class, cfg moduletools.ClassConfig, 43 ) error { 44 settings := vectorizer.NewClassSettings(cfg) 45 return settings.Validate(class) 46 } 47 48 var _ = modulecapabilities.ClassConfigurator(New()) 49 50 // type ConfigValidator struct { 51 // logger logrus.FieldLogger 52 // } 53 54 // type ClassSettings interface { 55 // VectorizeClassName() bool 56 // VectorizePropertyName(propName string) bool 57 // PropertyIndexed(propName string) bool 58 // } 59 60 // func NewConfigValidator(logger logrus.FieldLogger) *ConfigValidator { 61 // return &ConfigValidator{logger: logger} 62 // } 63 64 // func (cv *ConfigValidator) Do(ctx context.Context, class *models.Class, 65 // cfg moduletools.ClassConfig, settings ClassSettings) error { 66 // // In text2vec-openai (as opposed to e.g. text2vec-contextionary) the 67 // // assumption is that the models will be able to deal with any words, even 68 // // previously unseen ones. Therefore we do not need to validate individual 69 // // properties, but only the overall "index state" 70 71 // if err := cv.validateIndexState(ctx, class, settings); err != nil { 72 // return errors.Errorf("invalid combination of properties") 73 // } 74 75 // cv.checkForPossibilityOfDuplicateVectors(ctx, class, settings) 76 77 // return nil 78 // } 79 80 // func (cv *ConfigValidator) validateIndexState(ctx context.Context, 81 // class *models.Class, settings ClassSettings) error { 82 // if settings.VectorizeClassName() { 83 // // if the user chooses to vectorize the classname, vector-building will 84 // // always be possible, no need to investigate further 85 86 // return nil 87 // } 88 89 // // search if there is at least one indexed, string/text prop. If found pass 90 // // validation 91 // for _, prop := range class.Properties { 92 // if len(prop.DataType) < 1 { 93 // return errors.Errorf("property %s must have at least one datatype: "+ 94 // "got %v", prop.Name, prop.DataType) 95 // } 96 97 // if prop.DataType[0] != string(schema.DataTypeText) { 98 // // we can only vectorize text-like props 99 // continue 100 // } 101 102 // if settings.PropertyIndexed(prop.Name) { 103 // // found at least one, this is a valid schema 104 // return nil 105 // } 106 // } 107 108 // return fmt.Errorf("invalid properties: didn't find a single property which is " + 109 // "of type string or text and is not excluded from indexing. In addition the " + 110 // "class name is excluded from vectorization as well, meaning that it cannot be " + 111 // "used to determine the vector position. To fix this, set 'vectorizeClassName' " + 112 // "to true if the class name is contextionary-valid. Alternatively add at least " + 113 // "contextionary-valid text/string property which is not excluded from " + 114 // "indexing.") 115 // } 116 117 // func (cv *ConfigValidator) checkForPossibilityOfDuplicateVectors( 118 // ctx context.Context, class *models.Class, settings ClassSettings) { 119 // if !settings.VectorizeClassName() { 120 // // if the user choses not to vectorize the class name, this means they must 121 // // have chosen something else to vectorize, otherwise the validation would 122 // // have error'd before we ever got here. We can skip further checking. 123 124 // return 125 // } 126 127 // // search if there is at least one indexed, string/text prop. If found exit 128 // for _, prop := range class.Properties { 129 // // length check skipped, because validation has already passed 130 // if prop.DataType[0] != string(schema.DataTypeText) { 131 // // we can only vectorize text-like props 132 // continue 133 // } 134 135 // if settings.PropertyIndexed(prop.Name) { 136 // // found at least one 137 // return 138 // } 139 // } 140 141 // cv.logger.WithField("module", "text2vec-openai"). 142 // WithField("class", class.Class). 143 // Warnf("text2vec-openai: Class %q does not have any properties "+ 144 // "indexed (or only non text-properties indexed) and the vector position is "+ 145 // "only determined by the class name. Each object will end up with the same "+ 146 // "vector which leads to a severe performance penalty on imports. Consider "+ 147 // "setting vectorIndexConfig.skip=true for this property", class.Class) 148 // }