github.com/weaviate/weaviate@v1.24.6/usecases/classification/writer.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package classification
    13  
    14  import (
    15  	"sync"
    16  	"time"
    17  
    18  	"github.com/sirupsen/logrus"
    19  	enterrors "github.com/weaviate/weaviate/entities/errors"
    20  
    21  	"github.com/weaviate/weaviate/entities/errorcompounder"
    22  	"github.com/weaviate/weaviate/entities/search"
    23  	"github.com/weaviate/weaviate/usecases/objects"
    24  )
    25  
    26  type batchWriterResults struct {
    27  	successCount int64
    28  	errorCount   int64
    29  	err          error
    30  }
    31  
    32  func (w batchWriterResults) SuccessCount() int64 {
    33  	return w.successCount
    34  }
    35  
    36  func (w batchWriterResults) ErrorCount() int64 {
    37  	return w.errorCount
    38  }
    39  
    40  func (w batchWriterResults) Err() error {
    41  	return w.err
    42  }
    43  
    44  type batchWriter struct {
    45  	mutex           sync.RWMutex
    46  	vectorRepo      vectorRepo
    47  	batchItemsCount int
    48  	batchIndex      int
    49  	batchObjects    objects.BatchObjects
    50  	saveObjectItems chan objects.BatchObjects
    51  	errorCount      int64
    52  	ec              *errorcompounder.SafeErrorCompounder
    53  	cancel          chan struct{}
    54  	batchThreshold  int
    55  	logger          logrus.FieldLogger
    56  }
    57  
    58  func newBatchWriter(vectorRepo vectorRepo, logger logrus.FieldLogger) Writer {
    59  	return &batchWriter{
    60  		vectorRepo:      vectorRepo,
    61  		batchItemsCount: 0,
    62  		batchObjects:    objects.BatchObjects{},
    63  		saveObjectItems: make(chan objects.BatchObjects),
    64  		errorCount:      0,
    65  		ec:              &errorcompounder.SafeErrorCompounder{},
    66  		cancel:          make(chan struct{}),
    67  		batchThreshold:  100,
    68  		logger:          logger,
    69  	}
    70  }
    71  
    72  // Store puts an item to batch list
    73  func (r *batchWriter) Store(item search.Result) error {
    74  	r.mutex.Lock()
    75  	defer r.mutex.Unlock()
    76  	return r.storeObject(item)
    77  }
    78  
    79  // Start starts the batch save goroutine
    80  func (r *batchWriter) Start() {
    81  	enterrors.GoWrapper(func() { r.batchSave() }, r.logger)
    82  }
    83  
    84  // Stop stops the batch save goroutine and saves the last items
    85  func (r *batchWriter) Stop() WriterResults {
    86  	r.cancel <- struct{}{}
    87  	r.saveObjects(r.batchObjects)
    88  	return batchWriterResults{int64(r.batchItemsCount) - r.errorCount, r.errorCount, r.ec.ToError()}
    89  }
    90  
    91  func (r *batchWriter) storeObject(item search.Result) error {
    92  	batchObject := objects.BatchObject{
    93  		UUID:          item.ID,
    94  		Object:        item.Object(),
    95  		OriginalIndex: r.batchIndex,
    96  	}
    97  	r.batchItemsCount++
    98  	r.batchIndex++
    99  	r.batchObjects = append(r.batchObjects, batchObject)
   100  	if len(r.batchObjects) >= r.batchThreshold {
   101  		r.saveObjectItems <- r.batchObjects
   102  		r.batchObjects = objects.BatchObjects{}
   103  		r.batchIndex = 0
   104  	}
   105  	return nil
   106  }
   107  
   108  // This goroutine is created in order to make possible the batch save operation to be run in background
   109  // and not to block the Store(item) operation invocation which is being done by the worker threads
   110  func (r *batchWriter) batchSave() {
   111  	for {
   112  		select {
   113  		case <-r.cancel:
   114  			return
   115  		case items := <-r.saveObjectItems:
   116  			r.saveObjects(items)
   117  		}
   118  	}
   119  }
   120  
   121  func (r *batchWriter) saveObjects(items objects.BatchObjects) {
   122  	// we need to allow quite some time as this is now a batch, no longer just a
   123  	// single item and we don't have any control over what other load is
   124  	// currently going on, such as imports. TODO: should this be
   125  	// user-configurable?
   126  	ctx, cancel := contextWithTimeout(30 * time.Second)
   127  	defer cancel()
   128  
   129  	if len(items) > 0 {
   130  		saved, err := r.vectorRepo.BatchPutObjects(ctx, items, nil)
   131  		if err != nil {
   132  			r.ec.Add(err)
   133  		}
   134  		for i := range saved {
   135  			if saved[i].Err != nil {
   136  				r.ec.Add(saved[i].Err)
   137  				r.errorCount++
   138  			}
   139  		}
   140  	}
   141  }