github.com/weaviate/weaviate@v1.24.6/modules/text2vec-jinaai/vectorizer/class_settings.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package vectorizer
    13  
    14  import (
    15  	"fmt"
    16  
    17  	"github.com/pkg/errors"
    18  
    19  	"github.com/weaviate/weaviate/entities/models"
    20  	"github.com/weaviate/weaviate/entities/moduletools"
    21  	"github.com/weaviate/weaviate/entities/schema"
    22  	basesettings "github.com/weaviate/weaviate/usecases/modulecomponents/settings"
    23  )
    24  
    25  const (
    26  	DefaultJinaAIDocumentType    = "text"
    27  	DefaultJinaAIModel           = "jina-embeddings-v2-base-en"
    28  	DefaultVectorizeClassName    = true
    29  	DefaultPropertyIndexed       = true
    30  	DefaultVectorizePropertyName = false
    31  	DefaultBaseURL               = "https://api.jina.ai"
    32  )
    33  
    34  type classSettings struct {
    35  	basesettings.BaseClassSettings
    36  	cfg moduletools.ClassConfig
    37  }
    38  
    39  func NewClassSettings(cfg moduletools.ClassConfig) *classSettings {
    40  	return &classSettings{cfg: cfg, BaseClassSettings: *basesettings.NewBaseClassSettings(cfg)}
    41  }
    42  
    43  func (cs *classSettings) Model() string {
    44  	return cs.getProperty("model", DefaultJinaAIModel)
    45  }
    46  
    47  func (cs *classSettings) BaseURL() string {
    48  	return cs.getProperty("baseURL", DefaultBaseURL)
    49  }
    50  
    51  func (cs *classSettings) Validate(class *models.Class) error {
    52  	if cs.cfg == nil {
    53  		// we would receive a nil-config on cross-class requests, such as Explore{}
    54  		return errors.New("empty config")
    55  	}
    56  
    57  	if err := cs.BaseClassSettings.Validate(); err != nil {
    58  		return err
    59  	}
    60  
    61  	err := cs.validateIndexState(class, cs)
    62  	if err != nil {
    63  		return err
    64  	}
    65  
    66  	return nil
    67  }
    68  
    69  func (cs *classSettings) getProperty(name, defaultValue string) string {
    70  	return cs.BaseClassSettings.GetPropertyAsString(name, defaultValue)
    71  }
    72  
    73  func (cs *classSettings) validateIndexState(class *models.Class, settings ClassSettings) error {
    74  	if settings.VectorizeClassName() {
    75  		// if the user chooses to vectorize the classname, vector-building will
    76  		// always be possible, no need to investigate further
    77  
    78  		return nil
    79  	}
    80  
    81  	// search if there is at least one indexed, string/text prop. If found pass
    82  	// validation
    83  	for _, prop := range class.Properties {
    84  		if len(prop.DataType) < 1 {
    85  			return errors.Errorf("property %s must have at least one datatype: "+
    86  				"got %v", prop.Name, prop.DataType)
    87  		}
    88  
    89  		if prop.DataType[0] != string(schema.DataTypeText) {
    90  			// we can only vectorize text-like props
    91  			continue
    92  		}
    93  
    94  		if settings.PropertyIndexed(prop.Name) {
    95  			// found at least one, this is a valid schema
    96  			return nil
    97  		}
    98  	}
    99  
   100  	return fmt.Errorf("invalid properties: didn't find a single property which is " +
   101  		"of type string or text and is not excluded from indexing. In addition the " +
   102  		"class name is excluded from vectorization as well, meaning that it cannot be " +
   103  		"used to determine the vector position. To fix this, set 'vectorizeClassName' " +
   104  		"to true if the class name is contextionary-valid. Alternatively add at least " +
   105  		"contextionary-valid text/string property which is not excluded from " +
   106  		"indexing.")
   107  }