github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/repo.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package db 13 14 import ( 15 "context" 16 "math" 17 "runtime" 18 "runtime/debug" 19 "sync" 20 "sync/atomic" 21 "time" 22 23 enterrors "github.com/weaviate/weaviate/entities/errors" 24 25 "github.com/pkg/errors" 26 "github.com/sirupsen/logrus" 27 "github.com/weaviate/weaviate/adapters/repos/db/indexcheckpoint" 28 "github.com/weaviate/weaviate/entities/replication" 29 "github.com/weaviate/weaviate/entities/schema" 30 "github.com/weaviate/weaviate/entities/storobj" 31 "github.com/weaviate/weaviate/usecases/config" 32 "github.com/weaviate/weaviate/usecases/memwatch" 33 "github.com/weaviate/weaviate/usecases/monitoring" 34 "github.com/weaviate/weaviate/usecases/replica" 35 schemaUC "github.com/weaviate/weaviate/usecases/schema" 36 "github.com/weaviate/weaviate/usecases/sharding" 37 ) 38 39 type DB struct { 40 logger logrus.FieldLogger 41 schemaGetter schemaUC.SchemaGetter 42 config Config 43 indices map[string]*Index 44 remoteIndex sharding.RemoteIndexClient 45 replicaClient replica.Client 46 nodeResolver nodeResolver 47 remoteNode *sharding.RemoteNode 48 promMetrics *monitoring.PrometheusMetrics 49 indexCheckpoints *indexcheckpoint.Checkpoints 50 shutdown chan struct{} 51 startupComplete atomic.Bool 52 resourceScanState *resourceScanState 53 memMonitor *memwatch.Monitor 54 55 // indexLock is an RWMutex which allows concurrent access to various indexes, 56 // but only one modification at a time. R/W can be a bit confusing here, 57 // because it does not refer to write or read requests from a user's 58 // perspective, but rather: 59 // 60 // - Read -> The array containing all indexes is read-only. In other words 61 // there will never be a race condition from doing something like index := 62 // indexes[0]. What you do with the Index after retrieving it from the array 63 // does not matter. Assuming that it is thread-safe (it is) you can 64 // read/write from the index itself. Therefore from a user's perspective 65 // something like a parallel import batch and a read-query can happen without 66 // any problems. 67 // 68 // - Write -> The index array is being modified, for example, because a new 69 // index is added. This is mutually exclusive with the other case (but 70 // hopefully very short). 71 // 72 // 73 // See also: https://github.com/weaviate/weaviate/issues/2351 74 // 75 // This lock should be used to avoid that the indices-map is changed while iterating over it. To 76 // mark a given index in use, lock that index directly. 77 indexLock sync.RWMutex 78 79 jobQueueCh chan job 80 asyncIndexRetryInterval time.Duration 81 shutDownWg sync.WaitGroup 82 maxNumberGoroutines int 83 batchMonitorLock sync.Mutex 84 ratePerSecond int 85 86 // in the case of metrics grouping we need to observe some metrics 87 // node-centric, rather than shard-centric 88 metricsObserver *nodeWideMetricsObserver 89 } 90 91 func (db *DB) GetSchemaGetter() schemaUC.SchemaGetter { 92 return db.schemaGetter 93 } 94 95 func (db *DB) GetSchema() schema.Schema { 96 return db.schemaGetter.GetSchemaSkipAuth() 97 } 98 99 func (db *DB) GetConfig() Config { 100 return db.config 101 } 102 103 func (db *DB) GetIndices() []*Index { 104 out := make([]*Index, 0, len(db.indices)) 105 for _, index := range db.indices { 106 out = append(out, index) 107 } 108 109 return out 110 } 111 112 func (db *DB) GetRemoteIndex() sharding.RemoteIndexClient { 113 return db.remoteIndex 114 } 115 116 func (db *DB) SetSchemaGetter(sg schemaUC.SchemaGetter) { 117 db.schemaGetter = sg 118 } 119 120 func (db *DB) WaitForStartup(ctx context.Context) error { 121 err := db.init(ctx) 122 if err != nil { 123 return err 124 } 125 126 db.startupComplete.Store(true) 127 db.scanResourceUsage() 128 129 return nil 130 } 131 132 func (db *DB) StartupComplete() bool { return db.startupComplete.Load() } 133 134 func New(logger logrus.FieldLogger, config Config, 135 remoteIndex sharding.RemoteIndexClient, nodeResolver nodeResolver, 136 remoteNodesClient sharding.RemoteNodeClient, replicaClient replica.Client, 137 promMetrics *monitoring.PrometheusMetrics, 138 ) (*DB, error) { 139 db := &DB{ 140 logger: logger, 141 config: config, 142 indices: map[string]*Index{}, 143 remoteIndex: remoteIndex, 144 nodeResolver: nodeResolver, 145 remoteNode: sharding.NewRemoteNode(nodeResolver, remoteNodesClient), 146 replicaClient: replicaClient, 147 promMetrics: promMetrics, 148 shutdown: make(chan struct{}), 149 asyncIndexRetryInterval: 5 * time.Second, 150 maxNumberGoroutines: int(math.Round(config.MaxImportGoroutinesFactor * float64(runtime.GOMAXPROCS(0)))), 151 resourceScanState: newResourceScanState(), 152 memMonitor: memwatch.NewMonitor(memwatch.LiveHeapReader, debug.SetMemoryLimit, 0.97), 153 } 154 155 // make sure memMonitor has an initial state 156 db.memMonitor.Refresh() 157 158 if db.maxNumberGoroutines == 0 { 159 return db, errors.New("no workers to add batch-jobs configured.") 160 } 161 if !asyncEnabled() { 162 db.jobQueueCh = make(chan job, 100000) 163 db.shutDownWg.Add(db.maxNumberGoroutines) 164 for i := 0; i < db.maxNumberGoroutines; i++ { 165 i := i 166 enterrors.GoWrapper(func() { db.worker(i == 0) }, db.logger) 167 } 168 } else { 169 logger.Info("async indexing enabled") 170 w := runtime.GOMAXPROCS(0) - 1 171 db.shutDownWg.Add(w) 172 db.jobQueueCh = make(chan job, w) 173 for i := 0; i < w; i++ { 174 f := func() { 175 defer db.shutDownWg.Done() 176 asyncWorker(db.jobQueueCh, db.logger, db.asyncIndexRetryInterval) 177 } 178 enterrors.GoWrapper(f, db.logger) 179 180 } 181 } 182 183 return db, nil 184 } 185 186 type Config struct { 187 RootPath string 188 QueryLimit int64 189 QueryMaximumResults int64 190 QueryNestedRefLimit int64 191 ResourceUsage config.ResourceUsage 192 MaxImportGoroutinesFactor float64 193 MemtablesFlushDirtyAfter int 194 MemtablesInitialSizeMB int 195 MemtablesMaxSizeMB int 196 MemtablesMinActiveSeconds int 197 MemtablesMaxActiveSeconds int 198 TrackVectorDimensions bool 199 ServerVersion string 200 GitHash string 201 AvoidMMap bool 202 DisableLazyLoadShards bool 203 Replication replication.GlobalConfig 204 } 205 206 // GetIndex returns the index if it exists or nil if it doesn't 207 func (db *DB) GetIndex(className schema.ClassName) *Index { 208 db.indexLock.RLock() 209 defer db.indexLock.RUnlock() 210 211 id := indexID(className) 212 index, ok := db.indices[id] 213 if !ok { 214 return nil 215 } 216 217 return index 218 } 219 220 // IndexExists returns if an index exists 221 func (db *DB) IndexExists(className schema.ClassName) bool { 222 db.indexLock.RLock() 223 defer db.indexLock.RUnlock() 224 225 id := indexID(className) 226 _, ok := db.indices[id] 227 return ok 228 } 229 230 // GetIndexForIncoming returns the index if it exists or nil if it doesn't 231 func (db *DB) GetIndexForIncoming(className schema.ClassName) sharding.RemoteIndexIncomingRepo { 232 db.indexLock.RLock() 233 defer db.indexLock.RUnlock() 234 235 id := indexID(className) 236 index, ok := db.indices[id] 237 if !ok { 238 return nil 239 } 240 241 return index 242 } 243 244 // DeleteIndex deletes the index 245 func (db *DB) DeleteIndex(className schema.ClassName) error { 246 db.indexLock.Lock() 247 defer db.indexLock.Unlock() 248 249 // Get index 250 id := indexID(className) 251 index := db.indices[id] 252 if index == nil { 253 return nil 254 } 255 256 // Drop index 257 index.dropIndex.Lock() 258 defer index.dropIndex.Unlock() 259 if err := index.drop(); err != nil { 260 db.logger.WithField("action", "delete_index").WithField("class", className).Error(err) 261 } 262 delete(db.indices, id) 263 264 db.promMetrics.DeleteClass(className.String()) 265 return nil 266 } 267 268 func (db *DB) Shutdown(ctx context.Context) error { 269 db.shutdown <- struct{}{} 270 271 if !asyncEnabled() { 272 // shut down the workers that add objects to 273 for i := 0; i < db.maxNumberGoroutines; i++ { 274 db.jobQueueCh <- job{ 275 index: -1, 276 } 277 } 278 } 279 280 if db.metricsObserver != nil { 281 db.metricsObserver.Shutdown() 282 } 283 284 db.indexLock.Lock() 285 defer db.indexLock.Unlock() 286 for id, index := range db.indices { 287 if err := index.Shutdown(ctx); err != nil { 288 return errors.Wrapf(err, "shutdown index %q", id) 289 } 290 } 291 292 if asyncEnabled() { 293 // shut down the async workers 294 close(db.jobQueueCh) 295 } 296 297 db.shutDownWg.Wait() // wait until job queue shutdown is completed 298 299 if asyncEnabled() { 300 db.indexCheckpoints.Close() 301 } 302 303 return nil 304 } 305 306 func (db *DB) worker(first bool) { 307 objectCounter := 0 308 checkTime := time.Now().Add(time.Second) 309 for jobToAdd := range db.jobQueueCh { 310 if jobToAdd.index < 0 { 311 db.shutDownWg.Done() 312 return 313 } 314 jobToAdd.batcher.storeSingleObjectInAdditionalStorage(jobToAdd.ctx, jobToAdd.object, jobToAdd.status, jobToAdd.index) 315 jobToAdd.batcher.wg.Done() 316 objectCounter += 1 317 if first && time.Now().After(checkTime) { // only have one worker report the rate per second 318 db.batchMonitorLock.Lock() 319 db.ratePerSecond = objectCounter * db.maxNumberGoroutines 320 db.batchMonitorLock.Unlock() 321 322 objectCounter = 0 323 checkTime = time.Now().Add(time.Second) 324 } 325 } 326 } 327 328 type job struct { 329 object *storobj.Object 330 status objectInsertStatus 331 index int 332 ctx context.Context 333 batcher *objectsBatcher 334 335 // async only 336 chunk *chunk 337 indexer batchIndexer 338 queue *vectorQueue 339 } 340 341 func asyncWorker(ch chan job, logger logrus.FieldLogger, retryInterval time.Duration) { 342 var ids []uint64 343 var vectors [][]float32 344 var deleted []uint64 345 346 for job := range ch { 347 c := job.chunk 348 for i := range c.data[:c.cursor] { 349 if job.queue.IsDeleted(c.data[i].id) { 350 deleted = append(deleted, c.data[i].id) 351 } else { 352 ids = append(ids, c.data[i].id) 353 vectors = append(vectors, c.data[i].vector) 354 } 355 } 356 357 var err error 358 359 if len(ids) > 0 { 360 LOOP: 361 for { 362 err = job.indexer.AddBatch(job.ctx, ids, vectors) 363 if err == nil { 364 break LOOP 365 } 366 367 if errors.Is(err, context.Canceled) { 368 logger.WithError(err).Debugf("skipping indexing batch due to context cancellation") 369 break LOOP 370 } 371 372 logger.WithError(err).Infof("failed to index vectors, retrying in %s", retryInterval.String()) 373 374 t := time.NewTimer(retryInterval) 375 select { 376 case <-job.ctx.Done(): 377 // drain the timer 378 if !t.Stop() { 379 <-t.C 380 } 381 return 382 case <-t.C: 383 } 384 } 385 } 386 387 // only persist checkpoint if we indexed a full batch 388 if err == nil { 389 job.queue.persistCheckpoint(ids) 390 } 391 392 job.queue.releaseChunk(c) 393 394 if len(deleted) > 0 { 395 job.queue.ResetDeleted(deleted...) 396 } 397 398 ids = ids[:0] 399 vectors = vectors[:0] 400 deleted = deleted[:0] 401 } 402 }