github.com/weaviate/weaviate@v1.24.6/modules/text2vec-jinaai/vectorizer/class_settings.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package vectorizer 13 14 import ( 15 "fmt" 16 17 "github.com/pkg/errors" 18 19 "github.com/weaviate/weaviate/entities/models" 20 "github.com/weaviate/weaviate/entities/moduletools" 21 "github.com/weaviate/weaviate/entities/schema" 22 basesettings "github.com/weaviate/weaviate/usecases/modulecomponents/settings" 23 ) 24 25 const ( 26 DefaultJinaAIDocumentType = "text" 27 DefaultJinaAIModel = "jina-embeddings-v2-base-en" 28 DefaultVectorizeClassName = true 29 DefaultPropertyIndexed = true 30 DefaultVectorizePropertyName = false 31 DefaultBaseURL = "https://api.jina.ai" 32 ) 33 34 type classSettings struct { 35 basesettings.BaseClassSettings 36 cfg moduletools.ClassConfig 37 } 38 39 func NewClassSettings(cfg moduletools.ClassConfig) *classSettings { 40 return &classSettings{cfg: cfg, BaseClassSettings: *basesettings.NewBaseClassSettings(cfg)} 41 } 42 43 func (cs *classSettings) Model() string { 44 return cs.getProperty("model", DefaultJinaAIModel) 45 } 46 47 func (cs *classSettings) BaseURL() string { 48 return cs.getProperty("baseURL", DefaultBaseURL) 49 } 50 51 func (cs *classSettings) Validate(class *models.Class) error { 52 if cs.cfg == nil { 53 // we would receive a nil-config on cross-class requests, such as Explore{} 54 return errors.New("empty config") 55 } 56 57 if err := cs.BaseClassSettings.Validate(); err != nil { 58 return err 59 } 60 61 err := cs.validateIndexState(class, cs) 62 if err != nil { 63 return err 64 } 65 66 return nil 67 } 68 69 func (cs *classSettings) getProperty(name, defaultValue string) string { 70 return cs.BaseClassSettings.GetPropertyAsString(name, defaultValue) 71 } 72 73 func (cs *classSettings) validateIndexState(class *models.Class, settings ClassSettings) error { 74 if settings.VectorizeClassName() { 75 // if the user chooses to vectorize the classname, vector-building will 76 // always be possible, no need to investigate further 77 78 return nil 79 } 80 81 // search if there is at least one indexed, string/text prop. If found pass 82 // validation 83 for _, prop := range class.Properties { 84 if len(prop.DataType) < 1 { 85 return errors.Errorf("property %s must have at least one datatype: "+ 86 "got %v", prop.Name, prop.DataType) 87 } 88 89 if prop.DataType[0] != string(schema.DataTypeText) { 90 // we can only vectorize text-like props 91 continue 92 } 93 94 if settings.PropertyIndexed(prop.Name) { 95 // found at least one, this is a valid schema 96 return nil 97 } 98 } 99 100 return fmt.Errorf("invalid properties: didn't find a single property which is " + 101 "of type string or text and is not excluded from indexing. In addition the " + 102 "class name is excluded from vectorization as well, meaning that it cannot be " + 103 "used to determine the vector position. To fix this, set 'vectorizeClassName' " + 104 "to true if the class name is contextionary-valid. Alternatively add at least " + 105 "contextionary-valid text/string property which is not excluded from " + 106 "indexing.") 107 }