github.com/weaviate/weaviate@v1.24.6/modules/text2vec-openai/config.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package modopenai
    13  
    14  import (
    15  	"context"
    16  
    17  	"github.com/weaviate/weaviate/entities/models"
    18  	"github.com/weaviate/weaviate/entities/modulecapabilities"
    19  	"github.com/weaviate/weaviate/entities/moduletools"
    20  	"github.com/weaviate/weaviate/entities/schema"
    21  	"github.com/weaviate/weaviate/modules/text2vec-openai/vectorizer"
    22  )
    23  
    24  func (m *OpenAIModule) ClassConfigDefaults() map[string]interface{} {
    25  	return map[string]interface{}{
    26  		"vectorizeClassName": vectorizer.DefaultVectorizeClassName,
    27  		"baseURL":            vectorizer.DefaultBaseURL,
    28  		"model":              vectorizer.DefaultOpenAIModel,
    29  	}
    30  }
    31  
    32  func (m *OpenAIModule) PropertyConfigDefaults(
    33  	dt *schema.DataType,
    34  ) map[string]interface{} {
    35  	return map[string]interface{}{
    36  		"skip":                  !vectorizer.DefaultPropertyIndexed,
    37  		"vectorizePropertyName": vectorizer.DefaultVectorizePropertyName,
    38  	}
    39  }
    40  
    41  func (m *OpenAIModule) ValidateClass(ctx context.Context,
    42  	class *models.Class, cfg moduletools.ClassConfig,
    43  ) error {
    44  	settings := vectorizer.NewClassSettings(cfg)
    45  	return settings.Validate(class)
    46  }
    47  
    48  var _ = modulecapabilities.ClassConfigurator(New())
    49  
    50  // type ConfigValidator struct {
    51  // 	logger logrus.FieldLogger
    52  // }
    53  
    54  // type ClassSettings interface {
    55  // 	VectorizeClassName() bool
    56  // 	VectorizePropertyName(propName string) bool
    57  // 	PropertyIndexed(propName string) bool
    58  // }
    59  
    60  // func NewConfigValidator(logger logrus.FieldLogger) *ConfigValidator {
    61  // 	return &ConfigValidator{logger: logger}
    62  // }
    63  
    64  // func (cv *ConfigValidator) Do(ctx context.Context, class *models.Class,
    65  // 	cfg moduletools.ClassConfig, settings ClassSettings) error {
    66  // 	// In text2vec-openai (as opposed to e.g. text2vec-contextionary) the
    67  // 	// assumption is that the models will be able to deal with any words, even
    68  // 	// previously unseen ones. Therefore we do not need to validate individual
    69  // 	// properties, but only the overall "index state"
    70  
    71  // 	if err := cv.validateIndexState(ctx, class, settings); err != nil {
    72  // 		return errors.Errorf("invalid combination of properties")
    73  // 	}
    74  
    75  // 	cv.checkForPossibilityOfDuplicateVectors(ctx, class, settings)
    76  
    77  // 	return nil
    78  // }
    79  
    80  // func (cv *ConfigValidator) validateIndexState(ctx context.Context,
    81  // 	class *models.Class, settings ClassSettings) error {
    82  // 	if settings.VectorizeClassName() {
    83  // 		// if the user chooses to vectorize the classname, vector-building will
    84  // 		// always be possible, no need to investigate further
    85  
    86  // 		return nil
    87  // 	}
    88  
    89  // 	// search if there is at least one indexed, string/text prop. If found pass
    90  // 	// validation
    91  // 	for _, prop := range class.Properties {
    92  // 		if len(prop.DataType) < 1 {
    93  // 			return errors.Errorf("property %s must have at least one datatype: "+
    94  // 				"got %v", prop.Name, prop.DataType)
    95  // 		}
    96  
    97  // 		if prop.DataType[0] != string(schema.DataTypeText) {
    98  // 			// we can only vectorize text-like props
    99  // 			continue
   100  // 		}
   101  
   102  // 		if settings.PropertyIndexed(prop.Name) {
   103  // 			// found at least one, this is a valid schema
   104  // 			return nil
   105  // 		}
   106  // 	}
   107  
   108  // 	return fmt.Errorf("invalid properties: didn't find a single property which is " +
   109  // 		"of type string or text and is not excluded from indexing. In addition the " +
   110  // 		"class name is excluded from vectorization as well, meaning that it cannot be " +
   111  // 		"used to determine the vector position. To fix this, set 'vectorizeClassName' " +
   112  // 		"to true if the class name is contextionary-valid. Alternatively add at least " +
   113  // 		"contextionary-valid text/string property which is not excluded from " +
   114  // 		"indexing.")
   115  // }
   116  
   117  // func (cv *ConfigValidator) checkForPossibilityOfDuplicateVectors(
   118  // 	ctx context.Context, class *models.Class, settings ClassSettings) {
   119  // 	if !settings.VectorizeClassName() {
   120  // 		// if the user choses not to vectorize the class name, this means they must
   121  // 		// have chosen something else to vectorize, otherwise the validation would
   122  // 		// have error'd before we ever got here. We can skip further checking.
   123  
   124  // 		return
   125  // 	}
   126  
   127  // 	// search if there is at least one indexed, string/text prop. If found exit
   128  // 	for _, prop := range class.Properties {
   129  // 		// length check skipped, because validation has already passed
   130  // 		if prop.DataType[0] != string(schema.DataTypeText) {
   131  // 			// we can only vectorize text-like props
   132  // 			continue
   133  // 		}
   134  
   135  // 		if settings.PropertyIndexed(prop.Name) {
   136  // 			// found at least one
   137  // 			return
   138  // 		}
   139  // 	}
   140  
   141  // 	cv.logger.WithField("module", "text2vec-openai").
   142  // 		WithField("class", class.Class).
   143  // 		Warnf("text2vec-openai: Class %q does not have any properties "+
   144  // 			"indexed (or only non text-properties indexed) and the vector position is "+
   145  // 			"only determined by the class name. Each object will end up with the same "+
   146  // 			"vector which leads to a severe performance penalty on imports. Consider "+
   147  // 			"setting vectorIndexConfig.skip=true for this property", class.Class)
   148  // }