github.com/weaviate/weaviate@v1.24.6/usecases/objects/batch_add.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package objects 13 14 import ( 15 "context" 16 "fmt" 17 "runtime" 18 "time" 19 20 enterrors "github.com/weaviate/weaviate/entities/errors" 21 22 "github.com/go-openapi/strfmt" 23 "github.com/google/uuid" 24 "github.com/weaviate/weaviate/entities/additional" 25 "github.com/weaviate/weaviate/entities/errorcompounder" 26 "github.com/weaviate/weaviate/entities/models" 27 "github.com/weaviate/weaviate/entities/moduletools" 28 "github.com/weaviate/weaviate/usecases/objects/validation" 29 ) 30 31 // AddObjects Class Instances in batch to the connected DB 32 func (b *BatchManager) AddObjects(ctx context.Context, principal *models.Principal, 33 objects []*models.Object, fields []*string, repl *additional.ReplicationProperties, 34 ) (BatchObjects, error) { 35 err := b.authorizer.Authorize(principal, "create", "batch/objects") 36 if err != nil { 37 return nil, err 38 } 39 40 unlock, err := b.locks.LockConnector() 41 if err != nil { 42 return nil, NewErrInternal("could not acquire lock: %v", err) 43 } 44 defer unlock() 45 46 before := time.Now() 47 b.metrics.BatchInc() 48 defer b.metrics.BatchOp("total_uc_level", before.UnixNano()) 49 defer b.metrics.BatchDec() 50 51 return b.addObjects(ctx, principal, objects, fields, repl) 52 } 53 54 func (b *BatchManager) addObjects(ctx context.Context, principal *models.Principal, 55 classes []*models.Object, fields []*string, repl *additional.ReplicationProperties, 56 ) (BatchObjects, error) { 57 beforePreProcessing := time.Now() 58 if err := b.validateObjectForm(classes); err != nil { 59 return nil, NewErrInvalidUserInput("invalid param 'objects': %v", err) 60 } 61 62 batchObjects := b.validateObjectsConcurrently(ctx, principal, classes, fields, repl) 63 b.metrics.BatchOp("total_preprocessing", beforePreProcessing.UnixNano()) 64 65 var ( 66 res BatchObjects 67 err error 68 ) 69 70 beforePersistence := time.Now() 71 defer b.metrics.BatchOp("total_persistence_level", beforePersistence.UnixNano()) 72 if res, err = b.vectorRepo.BatchPutObjects(ctx, batchObjects, repl); err != nil { 73 return nil, NewErrInternal("batch objects: %#v", err) 74 } 75 76 return res, nil 77 } 78 79 func (b *BatchManager) validateObjectForm(classes []*models.Object) error { 80 if len(classes) == 0 { 81 return fmt.Errorf("cannot be empty, need at least one object for batching") 82 } 83 84 return nil 85 } 86 87 func (b *BatchManager) validateObjectsConcurrently(ctx context.Context, principal *models.Principal, 88 objects []*models.Object, fields []*string, repl *additional.ReplicationProperties, 89 ) BatchObjects { 90 fieldsToKeep := determineResponseFields(fields) 91 c := make(chan BatchObject, len(objects)) 92 93 // the validation function can't error directly, it would return an error 94 // over the channel. But by using an error group, we can easily limit the 95 // concurrency 96 // 97 // see https://github.com/weaviate/weaviate/issues/3179 for details of how the 98 // unbounded concurrency caused a production outage 99 eg := enterrors.NewErrorGroupWrapper(b.logger) 100 eg.SetLimit(2 * runtime.GOMAXPROCS(0)) 101 102 // Generate a goroutine for each separate request 103 for i, object := range objects { 104 i := i 105 object := object 106 eg.Go(func() error { 107 b.validateObject(ctx, principal, object, i, &c, fieldsToKeep, repl) 108 return nil 109 }, object.ID) 110 } 111 112 eg.Wait() 113 close(c) 114 return objectsChanToSlice(c) 115 } 116 117 func (b *BatchManager) validateObject(ctx context.Context, principal *models.Principal, 118 concept *models.Object, originalIndex int, resultsC *chan BatchObject, 119 fieldsToKeep map[string]struct{}, repl *additional.ReplicationProperties, 120 ) { 121 var id strfmt.UUID 122 123 ec := &errorcompounder.ErrorCompounder{} 124 125 // Auto Schema 126 err := b.autoSchemaManager.autoSchema(ctx, principal, concept, true) 127 ec.Add(err) 128 129 if concept.ID == "" { 130 // Generate UUID for the new object 131 uid, err := generateUUID() 132 id = uid 133 ec.Add(err) 134 } else { 135 if _, err := uuid.Parse(concept.ID.String()); err != nil { 136 ec.Add(err) 137 } 138 id = concept.ID 139 } 140 141 object := &models.Object{} 142 object.LastUpdateTimeUnix = 0 143 object.ID = id 144 object.Vector = concept.Vector 145 object.Vectors = concept.Vectors 146 object.Tenant = concept.Tenant 147 148 if _, ok := fieldsToKeep["class"]; ok { 149 object.Class = concept.Class 150 } 151 if _, ok := fieldsToKeep["properties"]; ok { 152 object.Properties = concept.Properties 153 } 154 155 if object.Properties == nil { 156 object.Properties = map[string]interface{}{} 157 } 158 now := unixNow() 159 if _, ok := fieldsToKeep["creationTimeUnix"]; ok { 160 object.CreationTimeUnix = now 161 } 162 if _, ok := fieldsToKeep["lastUpdateTimeUnix"]; ok { 163 object.LastUpdateTimeUnix = now 164 } 165 class, err := b.schemaManager.GetClass(ctx, principal, object.Class) 166 ec.Add(err) 167 if class == nil { 168 ec.Add(fmt.Errorf("class '%s' not present in schema", object.Class)) 169 } else { 170 err = validation.New(b.vectorRepo.Exists, b.config, repl). 171 Object(ctx, class, object, nil) 172 ec.Add(err) 173 174 if err == nil { 175 compFactory := func() (moduletools.VectorizablePropsComparator, error) { 176 searchObj, err := b.vectorRepo.Object(ctx, object.Class, id, nil, additional.Properties{}, repl, object.Tenant) 177 if err != nil { 178 return nil, err 179 } 180 if searchObj != nil { 181 prevObj := searchObj.Object() 182 return moduletools.NewVectorizablePropsComparator(class.Properties, 183 object.Properties, prevObj.Properties, prevObj.Vector, prevObj.Vectors), nil 184 } 185 return moduletools.NewVectorizablePropsComparatorDummy(class.Properties, object.Properties), nil 186 } 187 188 // update vector only if we passed validation 189 err = b.modulesProvider.UpdateVector(ctx, object, class, compFactory, b.findObject, b.logger) 190 ec.Add(err) 191 } 192 } 193 194 *resultsC <- BatchObject{ 195 UUID: id, 196 Object: object, 197 Err: ec.ToError(), 198 OriginalIndex: originalIndex, 199 } 200 } 201 202 func objectsChanToSlice(c chan BatchObject) BatchObjects { 203 result := make([]BatchObject, len(c)) 204 for object := range c { 205 result[object.OriginalIndex] = object 206 } 207 208 return result 209 } 210 211 func unixNow() int64 { 212 return time.Now().UnixNano() / int64(time.Millisecond) 213 }