github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/repo.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package db
    13  
    14  import (
    15  	"context"
    16  	"math"
    17  	"runtime"
    18  	"runtime/debug"
    19  	"sync"
    20  	"sync/atomic"
    21  	"time"
    22  
    23  	enterrors "github.com/weaviate/weaviate/entities/errors"
    24  
    25  	"github.com/pkg/errors"
    26  	"github.com/sirupsen/logrus"
    27  	"github.com/weaviate/weaviate/adapters/repos/db/indexcheckpoint"
    28  	"github.com/weaviate/weaviate/entities/replication"
    29  	"github.com/weaviate/weaviate/entities/schema"
    30  	"github.com/weaviate/weaviate/entities/storobj"
    31  	"github.com/weaviate/weaviate/usecases/config"
    32  	"github.com/weaviate/weaviate/usecases/memwatch"
    33  	"github.com/weaviate/weaviate/usecases/monitoring"
    34  	"github.com/weaviate/weaviate/usecases/replica"
    35  	schemaUC "github.com/weaviate/weaviate/usecases/schema"
    36  	"github.com/weaviate/weaviate/usecases/sharding"
    37  )
    38  
    39  type DB struct {
    40  	logger            logrus.FieldLogger
    41  	schemaGetter      schemaUC.SchemaGetter
    42  	config            Config
    43  	indices           map[string]*Index
    44  	remoteIndex       sharding.RemoteIndexClient
    45  	replicaClient     replica.Client
    46  	nodeResolver      nodeResolver
    47  	remoteNode        *sharding.RemoteNode
    48  	promMetrics       *monitoring.PrometheusMetrics
    49  	indexCheckpoints  *indexcheckpoint.Checkpoints
    50  	shutdown          chan struct{}
    51  	startupComplete   atomic.Bool
    52  	resourceScanState *resourceScanState
    53  	memMonitor        *memwatch.Monitor
    54  
    55  	// indexLock is an RWMutex which allows concurrent access to various indexes,
    56  	// but only one modification at a time. R/W can be a bit confusing here,
    57  	// because it does not refer to write or read requests from a user's
    58  	// perspective, but rather:
    59  	//
    60  	// - Read -> The array containing all indexes is read-only. In other words
    61  	// there will never be a race condition from doing something like index :=
    62  	// indexes[0]. What you do with the Index after retrieving it from the array
    63  	// does not matter. Assuming that it is thread-safe (it is) you can
    64  	// read/write from the index itself. Therefore from a user's perspective
    65  	// something like a parallel import batch and a read-query can happen without
    66  	// any problems.
    67  	//
    68  	// - Write -> The index array is being modified, for example, because a new
    69  	// index is added. This is mutually exclusive with the other case (but
    70  	// hopefully very short).
    71  	//
    72  	//
    73  	// See also: https://github.com/weaviate/weaviate/issues/2351
    74  	//
    75  	// This lock should be used to avoid that the indices-map is changed while iterating over it. To
    76  	// mark a given index in use, lock that index directly.
    77  	indexLock sync.RWMutex
    78  
    79  	jobQueueCh              chan job
    80  	asyncIndexRetryInterval time.Duration
    81  	shutDownWg              sync.WaitGroup
    82  	maxNumberGoroutines     int
    83  	batchMonitorLock        sync.Mutex
    84  	ratePerSecond           int
    85  
    86  	// in the case of metrics grouping we need to observe some metrics
    87  	// node-centric, rather than shard-centric
    88  	metricsObserver *nodeWideMetricsObserver
    89  }
    90  
    91  func (db *DB) GetSchemaGetter() schemaUC.SchemaGetter {
    92  	return db.schemaGetter
    93  }
    94  
    95  func (db *DB) GetSchema() schema.Schema {
    96  	return db.schemaGetter.GetSchemaSkipAuth()
    97  }
    98  
    99  func (db *DB) GetConfig() Config {
   100  	return db.config
   101  }
   102  
   103  func (db *DB) GetIndices() []*Index {
   104  	out := make([]*Index, 0, len(db.indices))
   105  	for _, index := range db.indices {
   106  		out = append(out, index)
   107  	}
   108  
   109  	return out
   110  }
   111  
   112  func (db *DB) GetRemoteIndex() sharding.RemoteIndexClient {
   113  	return db.remoteIndex
   114  }
   115  
   116  func (db *DB) SetSchemaGetter(sg schemaUC.SchemaGetter) {
   117  	db.schemaGetter = sg
   118  }
   119  
   120  func (db *DB) WaitForStartup(ctx context.Context) error {
   121  	err := db.init(ctx)
   122  	if err != nil {
   123  		return err
   124  	}
   125  
   126  	db.startupComplete.Store(true)
   127  	db.scanResourceUsage()
   128  
   129  	return nil
   130  }
   131  
   132  func (db *DB) StartupComplete() bool { return db.startupComplete.Load() }
   133  
   134  func New(logger logrus.FieldLogger, config Config,
   135  	remoteIndex sharding.RemoteIndexClient, nodeResolver nodeResolver,
   136  	remoteNodesClient sharding.RemoteNodeClient, replicaClient replica.Client,
   137  	promMetrics *monitoring.PrometheusMetrics,
   138  ) (*DB, error) {
   139  	db := &DB{
   140  		logger:                  logger,
   141  		config:                  config,
   142  		indices:                 map[string]*Index{},
   143  		remoteIndex:             remoteIndex,
   144  		nodeResolver:            nodeResolver,
   145  		remoteNode:              sharding.NewRemoteNode(nodeResolver, remoteNodesClient),
   146  		replicaClient:           replicaClient,
   147  		promMetrics:             promMetrics,
   148  		shutdown:                make(chan struct{}),
   149  		asyncIndexRetryInterval: 5 * time.Second,
   150  		maxNumberGoroutines:     int(math.Round(config.MaxImportGoroutinesFactor * float64(runtime.GOMAXPROCS(0)))),
   151  		resourceScanState:       newResourceScanState(),
   152  		memMonitor:              memwatch.NewMonitor(memwatch.LiveHeapReader, debug.SetMemoryLimit, 0.97),
   153  	}
   154  
   155  	// make sure memMonitor has an initial state
   156  	db.memMonitor.Refresh()
   157  
   158  	if db.maxNumberGoroutines == 0 {
   159  		return db, errors.New("no workers to add batch-jobs configured.")
   160  	}
   161  	if !asyncEnabled() {
   162  		db.jobQueueCh = make(chan job, 100000)
   163  		db.shutDownWg.Add(db.maxNumberGoroutines)
   164  		for i := 0; i < db.maxNumberGoroutines; i++ {
   165  			i := i
   166  			enterrors.GoWrapper(func() { db.worker(i == 0) }, db.logger)
   167  		}
   168  	} else {
   169  		logger.Info("async indexing enabled")
   170  		w := runtime.GOMAXPROCS(0) - 1
   171  		db.shutDownWg.Add(w)
   172  		db.jobQueueCh = make(chan job, w)
   173  		for i := 0; i < w; i++ {
   174  			f := func() {
   175  				defer db.shutDownWg.Done()
   176  				asyncWorker(db.jobQueueCh, db.logger, db.asyncIndexRetryInterval)
   177  			}
   178  			enterrors.GoWrapper(f, db.logger)
   179  
   180  		}
   181  	}
   182  
   183  	return db, nil
   184  }
   185  
   186  type Config struct {
   187  	RootPath                  string
   188  	QueryLimit                int64
   189  	QueryMaximumResults       int64
   190  	QueryNestedRefLimit       int64
   191  	ResourceUsage             config.ResourceUsage
   192  	MaxImportGoroutinesFactor float64
   193  	MemtablesFlushDirtyAfter  int
   194  	MemtablesInitialSizeMB    int
   195  	MemtablesMaxSizeMB        int
   196  	MemtablesMinActiveSeconds int
   197  	MemtablesMaxActiveSeconds int
   198  	TrackVectorDimensions     bool
   199  	ServerVersion             string
   200  	GitHash                   string
   201  	AvoidMMap                 bool
   202  	DisableLazyLoadShards     bool
   203  	Replication               replication.GlobalConfig
   204  }
   205  
   206  // GetIndex returns the index if it exists or nil if it doesn't
   207  func (db *DB) GetIndex(className schema.ClassName) *Index {
   208  	db.indexLock.RLock()
   209  	defer db.indexLock.RUnlock()
   210  
   211  	id := indexID(className)
   212  	index, ok := db.indices[id]
   213  	if !ok {
   214  		return nil
   215  	}
   216  
   217  	return index
   218  }
   219  
   220  // IndexExists returns if an index exists
   221  func (db *DB) IndexExists(className schema.ClassName) bool {
   222  	db.indexLock.RLock()
   223  	defer db.indexLock.RUnlock()
   224  
   225  	id := indexID(className)
   226  	_, ok := db.indices[id]
   227  	return ok
   228  }
   229  
   230  // GetIndexForIncoming returns the index if it exists or nil if it doesn't
   231  func (db *DB) GetIndexForIncoming(className schema.ClassName) sharding.RemoteIndexIncomingRepo {
   232  	db.indexLock.RLock()
   233  	defer db.indexLock.RUnlock()
   234  
   235  	id := indexID(className)
   236  	index, ok := db.indices[id]
   237  	if !ok {
   238  		return nil
   239  	}
   240  
   241  	return index
   242  }
   243  
   244  // DeleteIndex deletes the index
   245  func (db *DB) DeleteIndex(className schema.ClassName) error {
   246  	db.indexLock.Lock()
   247  	defer db.indexLock.Unlock()
   248  
   249  	// Get index
   250  	id := indexID(className)
   251  	index := db.indices[id]
   252  	if index == nil {
   253  		return nil
   254  	}
   255  
   256  	// Drop index
   257  	index.dropIndex.Lock()
   258  	defer index.dropIndex.Unlock()
   259  	if err := index.drop(); err != nil {
   260  		db.logger.WithField("action", "delete_index").WithField("class", className).Error(err)
   261  	}
   262  	delete(db.indices, id)
   263  
   264  	db.promMetrics.DeleteClass(className.String())
   265  	return nil
   266  }
   267  
   268  func (db *DB) Shutdown(ctx context.Context) error {
   269  	db.shutdown <- struct{}{}
   270  
   271  	if !asyncEnabled() {
   272  		// shut down the workers that add objects to
   273  		for i := 0; i < db.maxNumberGoroutines; i++ {
   274  			db.jobQueueCh <- job{
   275  				index: -1,
   276  			}
   277  		}
   278  	}
   279  
   280  	if db.metricsObserver != nil {
   281  		db.metricsObserver.Shutdown()
   282  	}
   283  
   284  	db.indexLock.Lock()
   285  	defer db.indexLock.Unlock()
   286  	for id, index := range db.indices {
   287  		if err := index.Shutdown(ctx); err != nil {
   288  			return errors.Wrapf(err, "shutdown index %q", id)
   289  		}
   290  	}
   291  
   292  	if asyncEnabled() {
   293  		// shut down the async workers
   294  		close(db.jobQueueCh)
   295  	}
   296  
   297  	db.shutDownWg.Wait() // wait until job queue shutdown is completed
   298  
   299  	if asyncEnabled() {
   300  		db.indexCheckpoints.Close()
   301  	}
   302  
   303  	return nil
   304  }
   305  
   306  func (db *DB) worker(first bool) {
   307  	objectCounter := 0
   308  	checkTime := time.Now().Add(time.Second)
   309  	for jobToAdd := range db.jobQueueCh {
   310  		if jobToAdd.index < 0 {
   311  			db.shutDownWg.Done()
   312  			return
   313  		}
   314  		jobToAdd.batcher.storeSingleObjectInAdditionalStorage(jobToAdd.ctx, jobToAdd.object, jobToAdd.status, jobToAdd.index)
   315  		jobToAdd.batcher.wg.Done()
   316  		objectCounter += 1
   317  		if first && time.Now().After(checkTime) { // only have one worker report the rate per second
   318  			db.batchMonitorLock.Lock()
   319  			db.ratePerSecond = objectCounter * db.maxNumberGoroutines
   320  			db.batchMonitorLock.Unlock()
   321  
   322  			objectCounter = 0
   323  			checkTime = time.Now().Add(time.Second)
   324  		}
   325  	}
   326  }
   327  
   328  type job struct {
   329  	object  *storobj.Object
   330  	status  objectInsertStatus
   331  	index   int
   332  	ctx     context.Context
   333  	batcher *objectsBatcher
   334  
   335  	// async only
   336  	chunk   *chunk
   337  	indexer batchIndexer
   338  	queue   *vectorQueue
   339  }
   340  
   341  func asyncWorker(ch chan job, logger logrus.FieldLogger, retryInterval time.Duration) {
   342  	var ids []uint64
   343  	var vectors [][]float32
   344  	var deleted []uint64
   345  
   346  	for job := range ch {
   347  		c := job.chunk
   348  		for i := range c.data[:c.cursor] {
   349  			if job.queue.IsDeleted(c.data[i].id) {
   350  				deleted = append(deleted, c.data[i].id)
   351  			} else {
   352  				ids = append(ids, c.data[i].id)
   353  				vectors = append(vectors, c.data[i].vector)
   354  			}
   355  		}
   356  
   357  		var err error
   358  
   359  		if len(ids) > 0 {
   360  		LOOP:
   361  			for {
   362  				err = job.indexer.AddBatch(job.ctx, ids, vectors)
   363  				if err == nil {
   364  					break LOOP
   365  				}
   366  
   367  				if errors.Is(err, context.Canceled) {
   368  					logger.WithError(err).Debugf("skipping indexing batch due to context cancellation")
   369  					break LOOP
   370  				}
   371  
   372  				logger.WithError(err).Infof("failed to index vectors, retrying in %s", retryInterval.String())
   373  
   374  				t := time.NewTimer(retryInterval)
   375  				select {
   376  				case <-job.ctx.Done():
   377  					// drain the timer
   378  					if !t.Stop() {
   379  						<-t.C
   380  					}
   381  					return
   382  				case <-t.C:
   383  				}
   384  			}
   385  		}
   386  
   387  		// only persist checkpoint if we indexed a full batch
   388  		if err == nil {
   389  			job.queue.persistCheckpoint(ids)
   390  		}
   391  
   392  		job.queue.releaseChunk(c)
   393  
   394  		if len(deleted) > 0 {
   395  			job.queue.ResetDeleted(deleted...)
   396  		}
   397  
   398  		ids = ids[:0]
   399  		vectors = vectors[:0]
   400  		deleted = deleted[:0]
   401  	}
   402  }