github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/shard_write_merge.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package db 13 14 import ( 15 "context" 16 "fmt" 17 18 "github.com/google/uuid" 19 "github.com/pkg/errors" 20 "github.com/weaviate/weaviate/adapters/repos/db/helpers" 21 "github.com/weaviate/weaviate/entities/models" 22 "github.com/weaviate/weaviate/entities/storagestate" 23 "github.com/weaviate/weaviate/entities/storobj" 24 "github.com/weaviate/weaviate/usecases/objects" 25 ) 26 27 func (s *Shard) MergeObject(ctx context.Context, merge objects.MergeDocument) error { 28 if s.isReadOnly() { 29 return storagestate.ErrStatusReadOnly 30 } 31 32 if s.hasTargetVectors() { 33 for targetVector, vector := range merge.Vectors { 34 // validation needs to happen before any changes are done. Otherwise, insertion is aborted somewhere in-between. 35 vectorIndex := s.VectorIndexForName(targetVector) 36 if vectorIndex == nil { 37 return errors.Errorf("Validate vector index for update of %v for target vector %s: vector index not found", merge.ID, targetVector) 38 } 39 err := vectorIndex.ValidateBeforeInsert(vector) 40 if err != nil { 41 return errors.Wrapf(err, "Validate vector index for update of %v for target vector %s", merge.ID, targetVector) 42 } 43 } 44 } else { 45 if merge.Vector != nil { 46 // validation needs to happen before any changes are done. Otherwise, insertion is aborted somewhere in-between. 47 err := s.vectorIndex.ValidateBeforeInsert(merge.Vector) 48 if err != nil { 49 return errors.Wrapf(err, "Validate vector index for update of %v", merge.ID) 50 } 51 } 52 } 53 54 idBytes, err := uuid.MustParse(merge.ID.String()).MarshalBinary() 55 if err != nil { 56 return err 57 } 58 59 return s.merge(ctx, idBytes, merge) 60 } 61 62 func (s *Shard) merge(ctx context.Context, idBytes []byte, doc objects.MergeDocument) error { 63 obj, status, err := s.mergeObjectInStorage(doc, idBytes) 64 if err != nil { 65 return err 66 } 67 68 // object was not changed, no further updates are required 69 // https://github.com/weaviate/weaviate/issues/3949 70 if status.skipUpsert { 71 return nil 72 } 73 74 if s.hasTargetVectors() { 75 for targetVector, vector := range obj.Vectors { 76 if err := s.updateVectorIndexForName(vector, status, targetVector); err != nil { 77 return errors.Wrapf(err, "update vector index for target vector %s", targetVector) 78 } 79 } 80 } else { 81 if err := s.updateVectorIndex(obj.Vector, status); err != nil { 82 return errors.Wrap(err, "update vector index") 83 } 84 } 85 86 if err := s.updatePropertySpecificIndices(obj, status); err != nil { 87 return errors.Wrap(err, "update property-specific indices") 88 } 89 90 if err := s.store.WriteWALs(); err != nil { 91 return errors.Wrap(err, "flush all buffered WALs") 92 } 93 94 return nil 95 } 96 97 func (s *Shard) mergeObjectInStorage(merge objects.MergeDocument, 98 idBytes []byte, 99 ) (*storobj.Object, objectInsertStatus, error) { 100 bucket := s.store.Bucket(helpers.ObjectsBucketLSM) 101 102 var prevObj, obj *storobj.Object 103 var status objectInsertStatus 104 105 // see comment in shard_write_put.go::putObjectLSM 106 lock := &s.docIdLock[s.uuidToIdLockPoolId(idBytes)] 107 108 // wrapped in function to handle lock/unlock 109 if err := func() error { 110 lock.Lock() 111 defer lock.Unlock() 112 113 var err error 114 prevObj, err = fetchObject(bucket, idBytes) 115 if err != nil { 116 return errors.Wrap(err, "get bucket") 117 } 118 119 obj, _, err = s.mergeObjectData(prevObj, merge) 120 if err != nil { 121 return errors.Wrap(err, "merge object data") 122 } 123 124 status, err = s.determineInsertStatus(prevObj, obj) 125 if err != nil { 126 return errors.Wrap(err, "check insert/update status") 127 } 128 129 obj.DocID = status.docID 130 if status.skipUpsert { 131 return nil 132 } 133 134 objBytes, err := obj.MarshalBinary() 135 if err != nil { 136 return errors.Wrapf(err, "marshal object %s to binary", obj.ID()) 137 } 138 139 if err := s.upsertObjectDataLSM(bucket, idBytes, objBytes, status.docID); err != nil { 140 return errors.Wrap(err, "upsert object data") 141 } 142 143 return nil 144 }(); err != nil { 145 return nil, objectInsertStatus{}, err 146 } else if status.skipUpsert { 147 return obj, status, nil 148 } 149 150 if err := s.updateInvertedIndexLSM(obj, status, prevObj); err != nil { 151 return nil, status, errors.Wrap(err, "update inverted indices") 152 } 153 154 return obj, status, nil 155 } 156 157 // mutableMergeObjectLSM is a special version of mergeObjectInTx where no doc 158 // id increases will be made, but instead the old doc ID will be re-used. This 159 // is only possible if the following two conditions are met: 160 // 161 // 1. We only add to the inverted index, but there is nothing which requires 162 // cleaning up. Example `name: "John"` is updated to `name: "John Doe"`, 163 // this is valid because we only add new entry for "Doe", but do not alter 164 // the existing entry for "John" 165 // An invalid update would be `name:"John"` is updated to `name:"Diane"`, 166 // this would require a cleanup for the existing link from "John" to this 167 // doc id, which is not possible. The only way to clean up is to increase 168 // the doc id and delete all entries for the old one 169 // 170 // 2. The vector position is not altered. Vector Indices cannot be mutated 171 // therefore a vector update would not be reflected 172 // 173 // The above makes this a perfect candidate for a batch reference update as 174 // this alters neither the vector position, nor does it remove anything from 175 // the inverted index 176 func (s *Shard) mutableMergeObjectLSM(merge objects.MergeDocument, 177 idBytes []byte, 178 ) (mutableMergeResult, error) { 179 bucket := s.store.Bucket(helpers.ObjectsBucketLSM) 180 out := mutableMergeResult{} 181 182 // see comment in shard_write_put.go::putObjectLSM 183 lock := &s.docIdLock[s.uuidToIdLockPoolId(idBytes)] 184 lock.Lock() 185 defer lock.Unlock() 186 187 prevObj, err := fetchObject(bucket, idBytes) 188 if err != nil { 189 return out, err 190 } 191 192 if prevObj == nil { 193 uid := uuid.UUID{} 194 uid.UnmarshalBinary(idBytes) 195 return out, fmt.Errorf("object with id %s not found", uid) 196 } 197 198 obj, notEmptyPrevObj, err := s.mergeObjectData(prevObj, merge) 199 if err != nil { 200 return out, errors.Wrap(err, "merge object data") 201 } 202 203 out.next = obj 204 out.previous = notEmptyPrevObj 205 206 status, err := s.determineMutableInsertStatus(prevObj, obj) 207 if err != nil { 208 return out, errors.Wrap(err, "check insert/update status") 209 } 210 out.status = status 211 212 obj.DocID = status.docID // is not changed 213 objBytes, err := obj.MarshalBinary() 214 if err != nil { 215 return out, errors.Wrapf(err, "marshal object %s to binary", obj.ID()) 216 } 217 218 if err := s.upsertObjectDataLSM(bucket, idBytes, objBytes, status.docID); err != nil { 219 return out, errors.Wrap(err, "upsert object data") 220 } 221 222 // do not updated inverted index, since this requires delta analysis, which 223 // must be done by the caller! 224 225 return out, nil 226 } 227 228 type mutableMergeResult struct { 229 next *storobj.Object 230 previous *storobj.Object 231 status objectInsertStatus 232 } 233 234 func (s *Shard) mergeObjectData(prevObj *storobj.Object, 235 merge objects.MergeDocument, 236 ) (*storobj.Object, *storobj.Object, error) { 237 if prevObj == nil { 238 // DocID must be overwritten after status check, simply set to initial 239 // value 240 prevObj = storobj.New(0) 241 prevObj.SetClass(merge.Class) 242 prevObj.SetID(merge.ID) 243 } 244 245 return mergeProps(prevObj, merge), prevObj, nil 246 } 247 248 func mergeProps(previous *storobj.Object, 249 merge objects.MergeDocument, 250 ) *storobj.Object { 251 next := previous.DeepCopyDangerous() 252 properties, ok := next.Properties().(map[string]interface{}) 253 if !ok || properties == nil { 254 properties = map[string]interface{}{} 255 } 256 257 // remove properties from object that have been set to nil 258 for _, propToDelete := range merge.PropertiesToDelete { 259 delete(properties, propToDelete) 260 } 261 262 for propName, value := range merge.PrimitiveSchema { 263 // for primitive props, we simply need to overwrite 264 properties[propName] = value 265 } 266 267 for _, ref := range merge.References { 268 propName := ref.From.Property.String() 269 prop := properties[propName] 270 propParsed, ok := prop.(models.MultipleRef) 271 if !ok { 272 propParsed = models.MultipleRef{} 273 } 274 propParsed = append(propParsed, ref.To.SingleRef()) 275 properties[propName] = propParsed 276 } 277 278 if merge.Vector == nil { 279 next.Vector = previous.Vector 280 } else { 281 next.Vector = merge.Vector 282 } 283 284 if len(merge.Vectors) == 0 { 285 next.Vectors = previous.Vectors 286 } else { 287 next.Vectors = vectorsAsMap(merge.Vectors) 288 } 289 290 next.Object.LastUpdateTimeUnix = merge.UpdateTime 291 next.SetProperties(properties) 292 293 return next 294 } 295 296 func vectorsAsMap(in models.Vectors) map[string][]float32 { 297 if len(in) > 0 { 298 out := make(map[string][]float32) 299 for targetVector, vector := range in { 300 out[targetVector] = vector 301 } 302 return out 303 } 304 return nil 305 }