github.com/weaviate/weaviate@v1.24.6/usecases/objects/merge.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package objects
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  
    18  	"github.com/weaviate/weaviate/usecases/config"
    19  
    20  	"github.com/go-openapi/strfmt"
    21  	"github.com/weaviate/weaviate/entities/additional"
    22  	"github.com/weaviate/weaviate/entities/models"
    23  	"github.com/weaviate/weaviate/entities/moduletools"
    24  	"github.com/weaviate/weaviate/entities/schema"
    25  	"github.com/weaviate/weaviate/entities/schema/crossref"
    26  )
    27  
    28  type MergeDocument struct {
    29  	Class                string                      `json:"class"`
    30  	ID                   strfmt.UUID                 `json:"id"`
    31  	PrimitiveSchema      map[string]interface{}      `json:"primitiveSchema"`
    32  	References           BatchReferences             `json:"references"`
    33  	Vector               []float32                   `json:"vector"`
    34  	Vectors              models.Vectors              `json:"vectors"`
    35  	UpdateTime           int64                       `json:"updateTime"`
    36  	AdditionalProperties models.AdditionalProperties `json:"additionalProperties"`
    37  	PropertiesToDelete   []string                    `json:"propertiesToDelete"`
    38  }
    39  
    40  func (m *Manager) MergeObject(ctx context.Context, principal *models.Principal,
    41  	updates *models.Object, repl *additional.ReplicationProperties,
    42  ) *Error {
    43  	if err := m.validateInputs(updates); err != nil {
    44  		return &Error{"bad request", StatusBadRequest, err}
    45  	}
    46  	cls, id := updates.Class, updates.ID
    47  	path := fmt.Sprintf("objects/%s/%s", cls, id)
    48  	if err := m.authorizer.Authorize(principal, "update", path); err != nil {
    49  		return &Error{path, StatusForbidden, err}
    50  	}
    51  
    52  	m.metrics.MergeObjectInc()
    53  	defer m.metrics.MergeObjectDec()
    54  
    55  	obj, err := m.vectorRepo.Object(ctx, cls, id, nil, additional.Properties{}, repl, updates.Tenant)
    56  	if err != nil {
    57  		switch err.(type) {
    58  		case ErrMultiTenancy:
    59  			return &Error{"repo.object", StatusUnprocessableEntity, err}
    60  		default:
    61  			return &Error{"repo.object", StatusInternalServerError, err}
    62  		}
    63  	}
    64  	if obj == nil {
    65  		return &Error{"not found", StatusNotFound, err}
    66  	}
    67  
    68  	err = m.autoSchemaManager.autoSchema(ctx, principal, updates, false)
    69  	if err != nil {
    70  		return &Error{"bad request", StatusBadRequest, NewErrInvalidUserInput("invalid object: %v", err)}
    71  	}
    72  
    73  	var propertiesToDelete []string
    74  	if updates.Properties != nil {
    75  		for key, val := range updates.Properties.(map[string]interface{}) {
    76  			if val == nil {
    77  				propertiesToDelete = append(propertiesToDelete, schema.LowercaseFirstLetter(key))
    78  			}
    79  		}
    80  	}
    81  
    82  	prevObj := obj.Object()
    83  	if err := m.validateObjectAndNormalizeNames(
    84  		ctx, principal, repl, updates, prevObj); err != nil {
    85  		return &Error{"bad request", StatusBadRequest, err}
    86  	}
    87  
    88  	if updates.Properties == nil {
    89  		updates.Properties = map[string]interface{}{}
    90  	}
    91  
    92  	return m.patchObject(ctx, principal, prevObj, updates, repl, propertiesToDelete, updates.Tenant)
    93  }
    94  
    95  // patchObject patches an existing object obj with updates
    96  func (m *Manager) patchObject(ctx context.Context, principal *models.Principal,
    97  	prevObj, updates *models.Object, repl *additional.ReplicationProperties,
    98  	propertiesToDelete []string, tenant string,
    99  ) *Error {
   100  	cls, id := updates.Class, updates.ID
   101  	primitive, refs := m.splitPrimitiveAndRefs(updates.Properties.(map[string]interface{}), cls, id)
   102  	objWithVec, err := m.mergeObjectSchemaAndVectorize(ctx, cls, prevObj.Properties,
   103  		primitive, principal, prevObj.Vector, updates.Vector, prevObj.Vectors, updates.Vectors)
   104  	if err != nil {
   105  		return &Error{"merge and vectorize", StatusInternalServerError, err}
   106  	}
   107  	mergeDoc := MergeDocument{
   108  		Class:              cls,
   109  		ID:                 id,
   110  		PrimitiveSchema:    primitive,
   111  		References:         refs,
   112  		Vector:             objWithVec.Vector,
   113  		Vectors:            objWithVec.Vectors,
   114  		UpdateTime:         m.timeSource.Now(),
   115  		PropertiesToDelete: propertiesToDelete,
   116  	}
   117  
   118  	if objWithVec.Additional != nil {
   119  		mergeDoc.AdditionalProperties = objWithVec.Additional
   120  	}
   121  
   122  	if err := m.vectorRepo.Merge(ctx, mergeDoc, repl, tenant); err != nil {
   123  		return &Error{"repo.merge", StatusInternalServerError, err}
   124  	}
   125  
   126  	return nil
   127  }
   128  
   129  func (m *Manager) validateInputs(updates *models.Object) error {
   130  	if updates == nil {
   131  		return fmt.Errorf("empty updates")
   132  	}
   133  	if updates.Class == "" {
   134  		return fmt.Errorf("empty class")
   135  	}
   136  	if updates.ID == "" {
   137  		return fmt.Errorf("empty uuid")
   138  	}
   139  	return nil
   140  }
   141  
   142  func (m *Manager) mergeObjectSchemaAndVectorize(ctx context.Context, className string,
   143  	prevPropsSch models.PropertySchema, nextProps map[string]interface{},
   144  	principal *models.Principal, prevVec, nextVec []float32,
   145  	prevVecs models.Vectors, nextVecs models.Vectors,
   146  ) (*models.Object, error) {
   147  	class, err := m.schemaManager.GetClass(ctx, principal, className)
   148  	if err != nil {
   149  		return nil, err
   150  	}
   151  
   152  	var mergedProps map[string]interface{}
   153  	var compFactory moduletools.PropsComparatorFactory
   154  
   155  	vector := nextVec
   156  	vectors := nextVecs
   157  	if prevPropsSch == nil {
   158  		mergedProps = nextProps
   159  
   160  		compFactory = func() (moduletools.VectorizablePropsComparator, error) {
   161  			return moduletools.NewVectorizablePropsComparatorDummy(class.Properties, mergedProps), nil
   162  		}
   163  	} else {
   164  		prevProps, ok := prevPropsSch.(map[string]interface{})
   165  		if !ok {
   166  			return nil, fmt.Errorf("expected previous schema to be map, but got %#v", prevPropsSch)
   167  		}
   168  
   169  		mergedProps = map[string]interface{}{}
   170  		for propName, propValue := range prevProps {
   171  			mergedProps[propName] = propValue
   172  		}
   173  		for propName, propValue := range nextProps {
   174  			mergedProps[propName] = propValue
   175  		}
   176  
   177  		compFactory = func() (moduletools.VectorizablePropsComparator, error) {
   178  			return moduletools.NewVectorizablePropsComparator(class.Properties, mergedProps, prevProps, prevVec, prevVecs), nil
   179  		}
   180  	}
   181  
   182  	// Note: vector could be a nil vector in case a vectorizer is configured,
   183  	// then the vectorizer will set it
   184  	obj := &models.Object{Class: className, Properties: mergedProps, Vector: vector, Vectors: vectors}
   185  	if err := m.modulesProvider.UpdateVector(ctx, obj, class, compFactory, m.findObject, m.logger); err != nil {
   186  		return nil, err
   187  	}
   188  
   189  	// If there is no vectorization module and no updated vector, use the previous vector(s)
   190  	if obj.Vector == nil && class.Vectorizer == config.VectorizerModuleNone {
   191  		obj.Vector = prevVec
   192  	}
   193  
   194  	if obj.Vectors == nil {
   195  		obj.Vectors = models.Vectors{}
   196  	}
   197  
   198  	// check for each named vector if the previous vector should be used. This should only happen if
   199  	// - the vectorizer is none
   200  	// - the vector is not set in the update
   201  	// - the vector was set in the previous object
   202  	for name, vectorConfig := range class.VectorConfig {
   203  		if _, ok := vectorConfig.Vectorizer.(map[string]interface{})[config.VectorizerModuleNone]; !ok {
   204  			continue
   205  		}
   206  
   207  		prevTargetVector, ok := prevVecs[name]
   208  		if !ok {
   209  			continue
   210  		}
   211  
   212  		if _, ok := obj.Vectors[name]; !ok {
   213  			obj.Vectors[name] = prevTargetVector
   214  		}
   215  	}
   216  
   217  	return obj, nil
   218  }
   219  
   220  func (m *Manager) splitPrimitiveAndRefs(in map[string]interface{}, sourceClass string,
   221  	sourceID strfmt.UUID,
   222  ) (map[string]interface{}, BatchReferences) {
   223  	primitive := map[string]interface{}{}
   224  	var outRefs BatchReferences
   225  
   226  	for prop, value := range in {
   227  		refs, ok := value.(models.MultipleRef)
   228  
   229  		if !ok {
   230  			// this must be a primitive filed
   231  			primitive[prop] = value
   232  			continue
   233  		}
   234  
   235  		for _, ref := range refs {
   236  			target, _ := crossref.Parse(ref.Beacon.String())
   237  			// safe to ignore error as validation has already been passed
   238  
   239  			source := &crossref.RefSource{
   240  				Local:    true,
   241  				PeerName: "localhost",
   242  				Property: schema.PropertyName(prop),
   243  				Class:    schema.ClassName(sourceClass),
   244  				TargetID: sourceID,
   245  			}
   246  
   247  			outRefs = append(outRefs, BatchReference{From: source, To: target})
   248  		}
   249  	}
   250  
   251  	return primitive, outRefs
   252  }