github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/shard_write_batch_delete.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package db 13 14 import ( 15 "context" 16 "fmt" 17 "sync" 18 "time" 19 20 enterrors "github.com/weaviate/weaviate/entities/errors" 21 22 "github.com/go-openapi/strfmt" 23 "github.com/pkg/errors" 24 "github.com/weaviate/weaviate/adapters/repos/db/inverted" 25 "github.com/weaviate/weaviate/entities/additional" 26 "github.com/weaviate/weaviate/entities/filters" 27 "github.com/weaviate/weaviate/entities/storagestate" 28 "github.com/weaviate/weaviate/usecases/objects" 29 ) 30 31 // return value map[int]error gives the error for the index as it received it 32 func (s *Shard) DeleteObjectBatch(ctx context.Context, uuids []strfmt.UUID, dryRun bool) objects.BatchSimpleObjects { 33 if s.isReadOnly() { 34 return objects.BatchSimpleObjects{ 35 objects.BatchSimpleObject{Err: storagestate.ErrStatusReadOnly}, 36 } 37 } 38 return newDeleteObjectsBatcher(s).Delete(ctx, uuids, dryRun) 39 } 40 41 type deleteObjectsBatcher struct { 42 sync.Mutex 43 shard ShardLike 44 objects objects.BatchSimpleObjects 45 } 46 47 func newDeleteObjectsBatcher(shard ShardLike) *deleteObjectsBatcher { 48 return &deleteObjectsBatcher{shard: shard} 49 } 50 51 func (b *deleteObjectsBatcher) Delete(ctx context.Context, uuids []strfmt.UUID, dryRun bool) objects.BatchSimpleObjects { 52 b.delete(ctx, uuids, dryRun) 53 b.flushWALs(ctx) 54 return b.objects 55 } 56 57 func (b *deleteObjectsBatcher) delete(ctx context.Context, uuids []strfmt.UUID, dryRun bool) { 58 b.objects = b.deleteSingleBatchInLSM(ctx, uuids, dryRun) 59 } 60 61 func (b *deleteObjectsBatcher) deleteSingleBatchInLSM(ctx context.Context, batch []strfmt.UUID, dryRun bool) objects.BatchSimpleObjects { 62 before := time.Now() 63 defer b.shard.Metrics().BatchDelete(before, "shard_delete_all") 64 65 result := make(objects.BatchSimpleObjects, len(batch)) 66 objLock := &sync.Mutex{} 67 68 // if the context is expired fail all 69 if err := ctx.Err(); err != nil { 70 for i := range result { 71 result[i] = objects.BatchSimpleObject{Err: errors.Wrap(err, "begin batch")} 72 } 73 return result 74 } 75 76 wg := &sync.WaitGroup{} 77 for j, docID := range batch { 78 index := j 79 docID := docID 80 wg.Add(1) 81 f := func() { 82 defer wg.Done() 83 // perform delete 84 obj := b.deleteObjectOfBatchInLSM(ctx, docID, dryRun) 85 objLock.Lock() 86 result[index] = obj 87 objLock.Unlock() 88 } 89 enterrors.GoWrapper(f, b.shard.Index().logger) 90 } 91 wg.Wait() 92 93 return result 94 } 95 96 func (b *deleteObjectsBatcher) deleteObjectOfBatchInLSM(ctx context.Context, uuid strfmt.UUID, dryRun bool) objects.BatchSimpleObject { 97 before := time.Now() 98 defer b.shard.Metrics().BatchDelete(before, "shard_delete_individual_total") 99 if !dryRun { 100 err := b.shard.batchDeleteObject(ctx, uuid) 101 return objects.BatchSimpleObject{UUID: uuid, Err: err} 102 } 103 104 return objects.BatchSimpleObject{UUID: uuid, Err: nil} 105 } 106 107 func (b *deleteObjectsBatcher) flushWALs(ctx context.Context) { 108 before := time.Now() 109 defer b.shard.Metrics().BatchDelete(before, "shard_flush_wals") 110 111 if err := b.shard.Store().WriteWALs(); err != nil { 112 for i := range b.objects { 113 b.setErrorAtIndex(err, i) 114 } 115 } 116 117 if b.shard.hasTargetVectors() { 118 for targetVector, vectorIndex := range b.shard.VectorIndexes() { 119 if err := vectorIndex.Flush(); err != nil { 120 for i := range b.objects { 121 b.setErrorAtIndex(fmt.Errorf("target vector %s: %w", targetVector, err), i) 122 } 123 } 124 } 125 } else { 126 if err := b.shard.VectorIndex().Flush(); err != nil { 127 for i := range b.objects { 128 b.setErrorAtIndex(err, i) 129 } 130 } 131 } 132 133 if err := b.shard.GetPropertyLengthTracker().Flush(false); err != nil { 134 for i := range b.objects { 135 b.setErrorAtIndex(err, i) 136 } 137 } 138 } 139 140 func (b *deleteObjectsBatcher) setErrorAtIndex(err error, index int) { 141 b.Lock() 142 defer b.Unlock() 143 b.objects[index].Err = err 144 } 145 146 func (s *Shard) findDocIDs(ctx context.Context, filters *filters.LocalFilter) ([]uint64, error) { 147 allowList, err := inverted.NewSearcher(s.index.logger, s.store, s.index.getSchema.GetSchemaSkipAuth(), 148 nil, s.index.classSearcher, s.index.stopwords, s.versioner.version, s.isFallbackToSearchable, 149 s.tenant(), s.index.Config.QueryNestedRefLimit, s.bitmapFactory). 150 DocIDs(ctx, filters, additional.Properties{}, s.index.Config.ClassName) 151 if err != nil { 152 return nil, err 153 } 154 return allowList.Slice(), nil 155 } 156 157 func (s *Shard) FindUUIDs(ctx context.Context, filters *filters.LocalFilter) ([]strfmt.UUID, error) { 158 docs, err := s.findDocIDs(ctx, filters) 159 if err != nil { 160 return nil, err 161 } 162 163 var ( 164 uuids = make([]strfmt.UUID, len(docs)) 165 currIdx = 0 166 ) 167 168 for _, doc := range docs { 169 uuid, err := s.uuidFromDocID(doc) 170 if err != nil { 171 // TODO: More than likely this will occur due to an object which has already been deleted. 172 // However, this is not a guarantee. This can be improved by logging, or handling 173 // errors other than `id not found` rather than skipping them entirely. 174 continue 175 } 176 uuids[currIdx] = uuid 177 currIdx++ 178 } 179 return uuids[:currIdx], nil 180 }