github.com/weaviate/weaviate@v1.24.6/usecases/objects/batch_add.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package objects
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"runtime"
    18  	"time"
    19  
    20  	enterrors "github.com/weaviate/weaviate/entities/errors"
    21  
    22  	"github.com/go-openapi/strfmt"
    23  	"github.com/google/uuid"
    24  	"github.com/weaviate/weaviate/entities/additional"
    25  	"github.com/weaviate/weaviate/entities/errorcompounder"
    26  	"github.com/weaviate/weaviate/entities/models"
    27  	"github.com/weaviate/weaviate/entities/moduletools"
    28  	"github.com/weaviate/weaviate/usecases/objects/validation"
    29  )
    30  
    31  // AddObjects Class Instances in batch to the connected DB
    32  func (b *BatchManager) AddObjects(ctx context.Context, principal *models.Principal,
    33  	objects []*models.Object, fields []*string, repl *additional.ReplicationProperties,
    34  ) (BatchObjects, error) {
    35  	err := b.authorizer.Authorize(principal, "create", "batch/objects")
    36  	if err != nil {
    37  		return nil, err
    38  	}
    39  
    40  	unlock, err := b.locks.LockConnector()
    41  	if err != nil {
    42  		return nil, NewErrInternal("could not acquire lock: %v", err)
    43  	}
    44  	defer unlock()
    45  
    46  	before := time.Now()
    47  	b.metrics.BatchInc()
    48  	defer b.metrics.BatchOp("total_uc_level", before.UnixNano())
    49  	defer b.metrics.BatchDec()
    50  
    51  	return b.addObjects(ctx, principal, objects, fields, repl)
    52  }
    53  
    54  func (b *BatchManager) addObjects(ctx context.Context, principal *models.Principal,
    55  	classes []*models.Object, fields []*string, repl *additional.ReplicationProperties,
    56  ) (BatchObjects, error) {
    57  	beforePreProcessing := time.Now()
    58  	if err := b.validateObjectForm(classes); err != nil {
    59  		return nil, NewErrInvalidUserInput("invalid param 'objects': %v", err)
    60  	}
    61  
    62  	batchObjects := b.validateObjectsConcurrently(ctx, principal, classes, fields, repl)
    63  	b.metrics.BatchOp("total_preprocessing", beforePreProcessing.UnixNano())
    64  
    65  	var (
    66  		res BatchObjects
    67  		err error
    68  	)
    69  
    70  	beforePersistence := time.Now()
    71  	defer b.metrics.BatchOp("total_persistence_level", beforePersistence.UnixNano())
    72  	if res, err = b.vectorRepo.BatchPutObjects(ctx, batchObjects, repl); err != nil {
    73  		return nil, NewErrInternal("batch objects: %#v", err)
    74  	}
    75  
    76  	return res, nil
    77  }
    78  
    79  func (b *BatchManager) validateObjectForm(classes []*models.Object) error {
    80  	if len(classes) == 0 {
    81  		return fmt.Errorf("cannot be empty, need at least one object for batching")
    82  	}
    83  
    84  	return nil
    85  }
    86  
    87  func (b *BatchManager) validateObjectsConcurrently(ctx context.Context, principal *models.Principal,
    88  	objects []*models.Object, fields []*string, repl *additional.ReplicationProperties,
    89  ) BatchObjects {
    90  	fieldsToKeep := determineResponseFields(fields)
    91  	c := make(chan BatchObject, len(objects))
    92  
    93  	// the validation function can't error directly, it would return an error
    94  	// over the channel. But by using an error group, we can easily limit the
    95  	// concurrency
    96  	//
    97  	// see https://github.com/weaviate/weaviate/issues/3179 for details of how the
    98  	// unbounded concurrency caused a production outage
    99  	eg := enterrors.NewErrorGroupWrapper(b.logger)
   100  	eg.SetLimit(2 * runtime.GOMAXPROCS(0))
   101  
   102  	// Generate a goroutine for each separate request
   103  	for i, object := range objects {
   104  		i := i
   105  		object := object
   106  		eg.Go(func() error {
   107  			b.validateObject(ctx, principal, object, i, &c, fieldsToKeep, repl)
   108  			return nil
   109  		}, object.ID)
   110  	}
   111  
   112  	eg.Wait()
   113  	close(c)
   114  	return objectsChanToSlice(c)
   115  }
   116  
   117  func (b *BatchManager) validateObject(ctx context.Context, principal *models.Principal,
   118  	concept *models.Object, originalIndex int, resultsC *chan BatchObject,
   119  	fieldsToKeep map[string]struct{}, repl *additional.ReplicationProperties,
   120  ) {
   121  	var id strfmt.UUID
   122  
   123  	ec := &errorcompounder.ErrorCompounder{}
   124  
   125  	// Auto Schema
   126  	err := b.autoSchemaManager.autoSchema(ctx, principal, concept, true)
   127  	ec.Add(err)
   128  
   129  	if concept.ID == "" {
   130  		// Generate UUID for the new object
   131  		uid, err := generateUUID()
   132  		id = uid
   133  		ec.Add(err)
   134  	} else {
   135  		if _, err := uuid.Parse(concept.ID.String()); err != nil {
   136  			ec.Add(err)
   137  		}
   138  		id = concept.ID
   139  	}
   140  
   141  	object := &models.Object{}
   142  	object.LastUpdateTimeUnix = 0
   143  	object.ID = id
   144  	object.Vector = concept.Vector
   145  	object.Vectors = concept.Vectors
   146  	object.Tenant = concept.Tenant
   147  
   148  	if _, ok := fieldsToKeep["class"]; ok {
   149  		object.Class = concept.Class
   150  	}
   151  	if _, ok := fieldsToKeep["properties"]; ok {
   152  		object.Properties = concept.Properties
   153  	}
   154  
   155  	if object.Properties == nil {
   156  		object.Properties = map[string]interface{}{}
   157  	}
   158  	now := unixNow()
   159  	if _, ok := fieldsToKeep["creationTimeUnix"]; ok {
   160  		object.CreationTimeUnix = now
   161  	}
   162  	if _, ok := fieldsToKeep["lastUpdateTimeUnix"]; ok {
   163  		object.LastUpdateTimeUnix = now
   164  	}
   165  	class, err := b.schemaManager.GetClass(ctx, principal, object.Class)
   166  	ec.Add(err)
   167  	if class == nil {
   168  		ec.Add(fmt.Errorf("class '%s' not present in schema", object.Class))
   169  	} else {
   170  		err = validation.New(b.vectorRepo.Exists, b.config, repl).
   171  			Object(ctx, class, object, nil)
   172  		ec.Add(err)
   173  
   174  		if err == nil {
   175  			compFactory := func() (moduletools.VectorizablePropsComparator, error) {
   176  				searchObj, err := b.vectorRepo.Object(ctx, object.Class, id, nil, additional.Properties{}, repl, object.Tenant)
   177  				if err != nil {
   178  					return nil, err
   179  				}
   180  				if searchObj != nil {
   181  					prevObj := searchObj.Object()
   182  					return moduletools.NewVectorizablePropsComparator(class.Properties,
   183  						object.Properties, prevObj.Properties, prevObj.Vector, prevObj.Vectors), nil
   184  				}
   185  				return moduletools.NewVectorizablePropsComparatorDummy(class.Properties, object.Properties), nil
   186  			}
   187  
   188  			// update vector only if we passed validation
   189  			err = b.modulesProvider.UpdateVector(ctx, object, class, compFactory, b.findObject, b.logger)
   190  			ec.Add(err)
   191  		}
   192  	}
   193  
   194  	*resultsC <- BatchObject{
   195  		UUID:          id,
   196  		Object:        object,
   197  		Err:           ec.ToError(),
   198  		OriginalIndex: originalIndex,
   199  	}
   200  }
   201  
   202  func objectsChanToSlice(c chan BatchObject) BatchObjects {
   203  	result := make([]BatchObject, len(c))
   204  	for object := range c {
   205  		result[object.OriginalIndex] = object
   206  	}
   207  
   208  	return result
   209  }
   210  
   211  func unixNow() int64 {
   212  	return time.Now().UnixNano() / int64(time.Millisecond)
   213  }