github.com/weaviate/weaviate@v1.24.6/usecases/classification/writer.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package classification 13 14 import ( 15 "sync" 16 "time" 17 18 "github.com/sirupsen/logrus" 19 enterrors "github.com/weaviate/weaviate/entities/errors" 20 21 "github.com/weaviate/weaviate/entities/errorcompounder" 22 "github.com/weaviate/weaviate/entities/search" 23 "github.com/weaviate/weaviate/usecases/objects" 24 ) 25 26 type batchWriterResults struct { 27 successCount int64 28 errorCount int64 29 err error 30 } 31 32 func (w batchWriterResults) SuccessCount() int64 { 33 return w.successCount 34 } 35 36 func (w batchWriterResults) ErrorCount() int64 { 37 return w.errorCount 38 } 39 40 func (w batchWriterResults) Err() error { 41 return w.err 42 } 43 44 type batchWriter struct { 45 mutex sync.RWMutex 46 vectorRepo vectorRepo 47 batchItemsCount int 48 batchIndex int 49 batchObjects objects.BatchObjects 50 saveObjectItems chan objects.BatchObjects 51 errorCount int64 52 ec *errorcompounder.SafeErrorCompounder 53 cancel chan struct{} 54 batchThreshold int 55 logger logrus.FieldLogger 56 } 57 58 func newBatchWriter(vectorRepo vectorRepo, logger logrus.FieldLogger) Writer { 59 return &batchWriter{ 60 vectorRepo: vectorRepo, 61 batchItemsCount: 0, 62 batchObjects: objects.BatchObjects{}, 63 saveObjectItems: make(chan objects.BatchObjects), 64 errorCount: 0, 65 ec: &errorcompounder.SafeErrorCompounder{}, 66 cancel: make(chan struct{}), 67 batchThreshold: 100, 68 logger: logger, 69 } 70 } 71 72 // Store puts an item to batch list 73 func (r *batchWriter) Store(item search.Result) error { 74 r.mutex.Lock() 75 defer r.mutex.Unlock() 76 return r.storeObject(item) 77 } 78 79 // Start starts the batch save goroutine 80 func (r *batchWriter) Start() { 81 enterrors.GoWrapper(func() { r.batchSave() }, r.logger) 82 } 83 84 // Stop stops the batch save goroutine and saves the last items 85 func (r *batchWriter) Stop() WriterResults { 86 r.cancel <- struct{}{} 87 r.saveObjects(r.batchObjects) 88 return batchWriterResults{int64(r.batchItemsCount) - r.errorCount, r.errorCount, r.ec.ToError()} 89 } 90 91 func (r *batchWriter) storeObject(item search.Result) error { 92 batchObject := objects.BatchObject{ 93 UUID: item.ID, 94 Object: item.Object(), 95 OriginalIndex: r.batchIndex, 96 } 97 r.batchItemsCount++ 98 r.batchIndex++ 99 r.batchObjects = append(r.batchObjects, batchObject) 100 if len(r.batchObjects) >= r.batchThreshold { 101 r.saveObjectItems <- r.batchObjects 102 r.batchObjects = objects.BatchObjects{} 103 r.batchIndex = 0 104 } 105 return nil 106 } 107 108 // This goroutine is created in order to make possible the batch save operation to be run in background 109 // and not to block the Store(item) operation invocation which is being done by the worker threads 110 func (r *batchWriter) batchSave() { 111 for { 112 select { 113 case <-r.cancel: 114 return 115 case items := <-r.saveObjectItems: 116 r.saveObjects(items) 117 } 118 } 119 } 120 121 func (r *batchWriter) saveObjects(items objects.BatchObjects) { 122 // we need to allow quite some time as this is now a batch, no longer just a 123 // single item and we don't have any control over what other load is 124 // currently going on, such as imports. TODO: should this be 125 // user-configurable? 126 ctx, cancel := contextWithTimeout(30 * time.Second) 127 defer cancel() 128 129 if len(items) > 0 { 130 saved, err := r.vectorRepo.BatchPutObjects(ctx, items, nil) 131 if err != nil { 132 r.ec.Add(err) 133 } 134 for i := range saved { 135 if saved[i].Err != nil { 136 r.ec.Add(saved[i].Err) 137 r.errorCount++ 138 } 139 } 140 } 141 }