github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/init.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package db
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"os"
    18  	"path"
    19  	"time"
    20  
    21  	enterrors "github.com/weaviate/weaviate/entities/errors"
    22  
    23  	"github.com/pkg/errors"
    24  	"github.com/weaviate/weaviate/adapters/repos/db/indexcheckpoint"
    25  	"github.com/weaviate/weaviate/adapters/repos/db/inverted"
    26  	"github.com/weaviate/weaviate/entities/models"
    27  	"github.com/weaviate/weaviate/entities/schema"
    28  	"github.com/weaviate/weaviate/usecases/config"
    29  	"github.com/weaviate/weaviate/usecases/replica"
    30  	migratefs "github.com/weaviate/weaviate/usecases/schema/migrate/fs"
    31  )
    32  
    33  // init gets the current schema and creates one index object per class.
    34  // The indices will in turn create shards, which will either read an
    35  // existing db file from disk, or create a new one if none exists
    36  func (db *DB) init(ctx context.Context) error {
    37  	if err := os.MkdirAll(db.config.RootPath, 0o777); err != nil {
    38  		return fmt.Errorf("create root path directory at %s: %w", db.config.RootPath, err)
    39  	}
    40  
    41  	// As of v1.22, db files are stored in a hierarchical structure
    42  	// rather than a flat one. If weaviate is started with files
    43  	// that are still in the flat structure, we will migrate them
    44  	// over.
    45  	if err := db.migrateFileStructureIfNecessary(); err != nil {
    46  		return err
    47  	}
    48  
    49  	if asyncEnabled() {
    50  		// init the index checkpoint file
    51  		var err error
    52  		db.indexCheckpoints, err = indexcheckpoint.New(db.config.RootPath, db.logger)
    53  		if err != nil {
    54  			return errors.Wrap(err, "init index checkpoint")
    55  		}
    56  	}
    57  
    58  	objects := db.schemaGetter.GetSchemaSkipAuth().Objects
    59  	if objects != nil {
    60  		for _, class := range objects.Classes {
    61  			invertedConfig := class.InvertedIndexConfig
    62  			if invertedConfig == nil {
    63  				// for backward compatibility, this field was introduced in v1.0.4,
    64  				// prior schemas will not yet have the field. Init with the defaults
    65  				// which were previously hard-coded.
    66  				// In this method we are essentially reading the schema from disk, so
    67  				// it could have been created before v1.0.4
    68  				invertedConfig = &models.InvertedIndexConfig{
    69  					CleanupIntervalSeconds: config.DefaultCleanupIntervalSeconds,
    70  					Bm25: &models.BM25Config{
    71  						K1: config.DefaultBM25k1,
    72  						B:  config.DefaultBM25b,
    73  					},
    74  				}
    75  			}
    76  			if err := replica.ValidateConfig(class, db.config.Replication); err != nil {
    77  				return fmt.Errorf("replication config: %w", err)
    78  			}
    79  
    80  			idx, err := NewIndex(ctx, IndexConfig{
    81  				ClassName:                 schema.ClassName(class.Class),
    82  				RootPath:                  db.config.RootPath,
    83  				ResourceUsage:             db.config.ResourceUsage,
    84  				QueryMaximumResults:       db.config.QueryMaximumResults,
    85  				QueryNestedRefLimit:       db.config.QueryNestedRefLimit,
    86  				MemtablesFlushDirtyAfter:  db.config.MemtablesFlushDirtyAfter,
    87  				MemtablesInitialSizeMB:    db.config.MemtablesInitialSizeMB,
    88  				MemtablesMaxSizeMB:        db.config.MemtablesMaxSizeMB,
    89  				MemtablesMinActiveSeconds: db.config.MemtablesMinActiveSeconds,
    90  				MemtablesMaxActiveSeconds: db.config.MemtablesMaxActiveSeconds,
    91  				TrackVectorDimensions:     db.config.TrackVectorDimensions,
    92  				AvoidMMap:                 db.config.AvoidMMap,
    93  				DisableLazyLoadShards:     db.config.DisableLazyLoadShards,
    94  				ReplicationFactor:         class.ReplicationConfig.Factor,
    95  			}, db.schemaGetter.CopyShardingState(class.Class),
    96  				inverted.ConfigFromModel(invertedConfig),
    97  				convertToVectorIndexConfig(class.VectorIndexConfig),
    98  				convertToVectorIndexConfigs(class.VectorConfig),
    99  				db.schemaGetter, db, db.logger, db.nodeResolver, db.remoteIndex,
   100  				db.replicaClient, db.promMetrics, class, db.jobQueueCh, db.indexCheckpoints)
   101  			if err != nil {
   102  				return errors.Wrap(err, "create index")
   103  			}
   104  
   105  			db.indexLock.Lock()
   106  			db.indices[idx.ID()] = idx
   107  			db.indexLock.Unlock()
   108  		}
   109  	}
   110  
   111  	// If metrics aren't grouped, there is no need to observe node-wide metrics
   112  	// asynchronously. In that case, each shard could track its own metrics with
   113  	// a unique label. It is only when we conflate all collections/shards into
   114  	// "n/a" that we need to actively aggregate node-wide metrics.
   115  	//
   116  	// See also https://github.com/weaviate/weaviate/issues/4396
   117  	if db.promMetrics != nil && db.promMetrics.Group {
   118  		db.metricsObserver = newNodeWideMetricsObserver(db)
   119  		enterrors.GoWrapper(func() { db.metricsObserver.Start() }, db.logger)
   120  	}
   121  
   122  	return nil
   123  }
   124  
   125  func (db *DB) migrateFileStructureIfNecessary() error {
   126  	fsMigrationPath := path.Join(db.config.RootPath, "migration1.22.fs.hierarchy")
   127  	exists, err := fileExists(fsMigrationPath)
   128  	if err != nil {
   129  		return err
   130  	}
   131  	if !exists {
   132  		if err = db.migrateToHierarchicalFS(); err != nil {
   133  			return fmt.Errorf("migrate to hierarchical fs: %w", err)
   134  		}
   135  		if _, err = os.Create(fsMigrationPath); err != nil {
   136  			return fmt.Errorf("create hierarchical fs indicator: %w", err)
   137  		}
   138  	}
   139  	return nil
   140  }
   141  
   142  func (db *DB) migrateToHierarchicalFS() error {
   143  	before := time.Now()
   144  
   145  	if err := migratefs.MigrateToHierarchicalFS(db.config.RootPath, db.schemaGetter); err != nil {
   146  		return err
   147  	}
   148  	db.logger.WithField("action", "hierarchical_fs_migration").
   149  		Debugf("fs migration took %s\n", time.Since(before))
   150  	return nil
   151  }
   152  
   153  func fileExists(file string) (bool, error) {
   154  	_, err := os.Stat(file)
   155  	if os.IsNotExist(err) {
   156  		return false, nil
   157  	}
   158  	if err != nil {
   159  		return false, err
   160  	}
   161  	return true, nil
   162  }