github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/shard_write_batch_delete.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package db
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"sync"
    18  	"time"
    19  
    20  	enterrors "github.com/weaviate/weaviate/entities/errors"
    21  
    22  	"github.com/go-openapi/strfmt"
    23  	"github.com/pkg/errors"
    24  	"github.com/weaviate/weaviate/adapters/repos/db/inverted"
    25  	"github.com/weaviate/weaviate/entities/additional"
    26  	"github.com/weaviate/weaviate/entities/filters"
    27  	"github.com/weaviate/weaviate/entities/storagestate"
    28  	"github.com/weaviate/weaviate/usecases/objects"
    29  )
    30  
    31  // return value map[int]error gives the error for the index as it received it
    32  func (s *Shard) DeleteObjectBatch(ctx context.Context, uuids []strfmt.UUID, dryRun bool) objects.BatchSimpleObjects {
    33  	if s.isReadOnly() {
    34  		return objects.BatchSimpleObjects{
    35  			objects.BatchSimpleObject{Err: storagestate.ErrStatusReadOnly},
    36  		}
    37  	}
    38  	return newDeleteObjectsBatcher(s).Delete(ctx, uuids, dryRun)
    39  }
    40  
    41  type deleteObjectsBatcher struct {
    42  	sync.Mutex
    43  	shard   ShardLike
    44  	objects objects.BatchSimpleObjects
    45  }
    46  
    47  func newDeleteObjectsBatcher(shard ShardLike) *deleteObjectsBatcher {
    48  	return &deleteObjectsBatcher{shard: shard}
    49  }
    50  
    51  func (b *deleteObjectsBatcher) Delete(ctx context.Context, uuids []strfmt.UUID, dryRun bool) objects.BatchSimpleObjects {
    52  	b.delete(ctx, uuids, dryRun)
    53  	b.flushWALs(ctx)
    54  	return b.objects
    55  }
    56  
    57  func (b *deleteObjectsBatcher) delete(ctx context.Context, uuids []strfmt.UUID, dryRun bool) {
    58  	b.objects = b.deleteSingleBatchInLSM(ctx, uuids, dryRun)
    59  }
    60  
    61  func (b *deleteObjectsBatcher) deleteSingleBatchInLSM(ctx context.Context, batch []strfmt.UUID, dryRun bool) objects.BatchSimpleObjects {
    62  	before := time.Now()
    63  	defer b.shard.Metrics().BatchDelete(before, "shard_delete_all")
    64  
    65  	result := make(objects.BatchSimpleObjects, len(batch))
    66  	objLock := &sync.Mutex{}
    67  
    68  	// if the context is expired fail all
    69  	if err := ctx.Err(); err != nil {
    70  		for i := range result {
    71  			result[i] = objects.BatchSimpleObject{Err: errors.Wrap(err, "begin batch")}
    72  		}
    73  		return result
    74  	}
    75  
    76  	wg := &sync.WaitGroup{}
    77  	for j, docID := range batch {
    78  		index := j
    79  		docID := docID
    80  		wg.Add(1)
    81  		f := func() {
    82  			defer wg.Done()
    83  			// perform delete
    84  			obj := b.deleteObjectOfBatchInLSM(ctx, docID, dryRun)
    85  			objLock.Lock()
    86  			result[index] = obj
    87  			objLock.Unlock()
    88  		}
    89  		enterrors.GoWrapper(f, b.shard.Index().logger)
    90  	}
    91  	wg.Wait()
    92  
    93  	return result
    94  }
    95  
    96  func (b *deleteObjectsBatcher) deleteObjectOfBatchInLSM(ctx context.Context, uuid strfmt.UUID, dryRun bool) objects.BatchSimpleObject {
    97  	before := time.Now()
    98  	defer b.shard.Metrics().BatchDelete(before, "shard_delete_individual_total")
    99  	if !dryRun {
   100  		err := b.shard.batchDeleteObject(ctx, uuid)
   101  		return objects.BatchSimpleObject{UUID: uuid, Err: err}
   102  	}
   103  
   104  	return objects.BatchSimpleObject{UUID: uuid, Err: nil}
   105  }
   106  
   107  func (b *deleteObjectsBatcher) flushWALs(ctx context.Context) {
   108  	before := time.Now()
   109  	defer b.shard.Metrics().BatchDelete(before, "shard_flush_wals")
   110  
   111  	if err := b.shard.Store().WriteWALs(); err != nil {
   112  		for i := range b.objects {
   113  			b.setErrorAtIndex(err, i)
   114  		}
   115  	}
   116  
   117  	if b.shard.hasTargetVectors() {
   118  		for targetVector, vectorIndex := range b.shard.VectorIndexes() {
   119  			if err := vectorIndex.Flush(); err != nil {
   120  				for i := range b.objects {
   121  					b.setErrorAtIndex(fmt.Errorf("target vector %s: %w", targetVector, err), i)
   122  				}
   123  			}
   124  		}
   125  	} else {
   126  		if err := b.shard.VectorIndex().Flush(); err != nil {
   127  			for i := range b.objects {
   128  				b.setErrorAtIndex(err, i)
   129  			}
   130  		}
   131  	}
   132  
   133  	if err := b.shard.GetPropertyLengthTracker().Flush(false); err != nil {
   134  		for i := range b.objects {
   135  			b.setErrorAtIndex(err, i)
   136  		}
   137  	}
   138  }
   139  
   140  func (b *deleteObjectsBatcher) setErrorAtIndex(err error, index int) {
   141  	b.Lock()
   142  	defer b.Unlock()
   143  	b.objects[index].Err = err
   144  }
   145  
   146  func (s *Shard) findDocIDs(ctx context.Context, filters *filters.LocalFilter) ([]uint64, error) {
   147  	allowList, err := inverted.NewSearcher(s.index.logger, s.store, s.index.getSchema.GetSchemaSkipAuth(),
   148  		nil, s.index.classSearcher, s.index.stopwords, s.versioner.version, s.isFallbackToSearchable,
   149  		s.tenant(), s.index.Config.QueryNestedRefLimit, s.bitmapFactory).
   150  		DocIDs(ctx, filters, additional.Properties{}, s.index.Config.ClassName)
   151  	if err != nil {
   152  		return nil, err
   153  	}
   154  	return allowList.Slice(), nil
   155  }
   156  
   157  func (s *Shard) FindUUIDs(ctx context.Context, filters *filters.LocalFilter) ([]strfmt.UUID, error) {
   158  	docs, err := s.findDocIDs(ctx, filters)
   159  	if err != nil {
   160  		return nil, err
   161  	}
   162  
   163  	var (
   164  		uuids   = make([]strfmt.UUID, len(docs))
   165  		currIdx = 0
   166  	)
   167  
   168  	for _, doc := range docs {
   169  		uuid, err := s.uuidFromDocID(doc)
   170  		if err != nil {
   171  			// TODO: More than likely this will occur due to an object which has already been deleted.
   172  			//       However, this is not a guarantee. This can be improved by logging, or handling
   173  			//       errors other than `id not found` rather than skipping them entirely.
   174  			continue
   175  		}
   176  		uuids[currIdx] = uuid
   177  		currIdx++
   178  	}
   179  	return uuids[:currIdx], nil
   180  }