github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/init.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package db 13 14 import ( 15 "context" 16 "fmt" 17 "os" 18 "path" 19 "time" 20 21 enterrors "github.com/weaviate/weaviate/entities/errors" 22 23 "github.com/pkg/errors" 24 "github.com/weaviate/weaviate/adapters/repos/db/indexcheckpoint" 25 "github.com/weaviate/weaviate/adapters/repos/db/inverted" 26 "github.com/weaviate/weaviate/entities/models" 27 "github.com/weaviate/weaviate/entities/schema" 28 "github.com/weaviate/weaviate/usecases/config" 29 "github.com/weaviate/weaviate/usecases/replica" 30 migratefs "github.com/weaviate/weaviate/usecases/schema/migrate/fs" 31 ) 32 33 // init gets the current schema and creates one index object per class. 34 // The indices will in turn create shards, which will either read an 35 // existing db file from disk, or create a new one if none exists 36 func (db *DB) init(ctx context.Context) error { 37 if err := os.MkdirAll(db.config.RootPath, 0o777); err != nil { 38 return fmt.Errorf("create root path directory at %s: %w", db.config.RootPath, err) 39 } 40 41 // As of v1.22, db files are stored in a hierarchical structure 42 // rather than a flat one. If weaviate is started with files 43 // that are still in the flat structure, we will migrate them 44 // over. 45 if err := db.migrateFileStructureIfNecessary(); err != nil { 46 return err 47 } 48 49 if asyncEnabled() { 50 // init the index checkpoint file 51 var err error 52 db.indexCheckpoints, err = indexcheckpoint.New(db.config.RootPath, db.logger) 53 if err != nil { 54 return errors.Wrap(err, "init index checkpoint") 55 } 56 } 57 58 objects := db.schemaGetter.GetSchemaSkipAuth().Objects 59 if objects != nil { 60 for _, class := range objects.Classes { 61 invertedConfig := class.InvertedIndexConfig 62 if invertedConfig == nil { 63 // for backward compatibility, this field was introduced in v1.0.4, 64 // prior schemas will not yet have the field. Init with the defaults 65 // which were previously hard-coded. 66 // In this method we are essentially reading the schema from disk, so 67 // it could have been created before v1.0.4 68 invertedConfig = &models.InvertedIndexConfig{ 69 CleanupIntervalSeconds: config.DefaultCleanupIntervalSeconds, 70 Bm25: &models.BM25Config{ 71 K1: config.DefaultBM25k1, 72 B: config.DefaultBM25b, 73 }, 74 } 75 } 76 if err := replica.ValidateConfig(class, db.config.Replication); err != nil { 77 return fmt.Errorf("replication config: %w", err) 78 } 79 80 idx, err := NewIndex(ctx, IndexConfig{ 81 ClassName: schema.ClassName(class.Class), 82 RootPath: db.config.RootPath, 83 ResourceUsage: db.config.ResourceUsage, 84 QueryMaximumResults: db.config.QueryMaximumResults, 85 QueryNestedRefLimit: db.config.QueryNestedRefLimit, 86 MemtablesFlushDirtyAfter: db.config.MemtablesFlushDirtyAfter, 87 MemtablesInitialSizeMB: db.config.MemtablesInitialSizeMB, 88 MemtablesMaxSizeMB: db.config.MemtablesMaxSizeMB, 89 MemtablesMinActiveSeconds: db.config.MemtablesMinActiveSeconds, 90 MemtablesMaxActiveSeconds: db.config.MemtablesMaxActiveSeconds, 91 TrackVectorDimensions: db.config.TrackVectorDimensions, 92 AvoidMMap: db.config.AvoidMMap, 93 DisableLazyLoadShards: db.config.DisableLazyLoadShards, 94 ReplicationFactor: class.ReplicationConfig.Factor, 95 }, db.schemaGetter.CopyShardingState(class.Class), 96 inverted.ConfigFromModel(invertedConfig), 97 convertToVectorIndexConfig(class.VectorIndexConfig), 98 convertToVectorIndexConfigs(class.VectorConfig), 99 db.schemaGetter, db, db.logger, db.nodeResolver, db.remoteIndex, 100 db.replicaClient, db.promMetrics, class, db.jobQueueCh, db.indexCheckpoints) 101 if err != nil { 102 return errors.Wrap(err, "create index") 103 } 104 105 db.indexLock.Lock() 106 db.indices[idx.ID()] = idx 107 db.indexLock.Unlock() 108 } 109 } 110 111 // If metrics aren't grouped, there is no need to observe node-wide metrics 112 // asynchronously. In that case, each shard could track its own metrics with 113 // a unique label. It is only when we conflate all collections/shards into 114 // "n/a" that we need to actively aggregate node-wide metrics. 115 // 116 // See also https://github.com/weaviate/weaviate/issues/4396 117 if db.promMetrics != nil && db.promMetrics.Group { 118 db.metricsObserver = newNodeWideMetricsObserver(db) 119 enterrors.GoWrapper(func() { db.metricsObserver.Start() }, db.logger) 120 } 121 122 return nil 123 } 124 125 func (db *DB) migrateFileStructureIfNecessary() error { 126 fsMigrationPath := path.Join(db.config.RootPath, "migration1.22.fs.hierarchy") 127 exists, err := fileExists(fsMigrationPath) 128 if err != nil { 129 return err 130 } 131 if !exists { 132 if err = db.migrateToHierarchicalFS(); err != nil { 133 return fmt.Errorf("migrate to hierarchical fs: %w", err) 134 } 135 if _, err = os.Create(fsMigrationPath); err != nil { 136 return fmt.Errorf("create hierarchical fs indicator: %w", err) 137 } 138 } 139 return nil 140 } 141 142 func (db *DB) migrateToHierarchicalFS() error { 143 before := time.Now() 144 145 if err := migratefs.MigrateToHierarchicalFS(db.config.RootPath, db.schemaGetter); err != nil { 146 return err 147 } 148 db.logger.WithField("action", "hierarchical_fs_migration"). 149 Debugf("fs migration took %s\n", time.Since(before)) 150 return nil 151 } 152 153 func fileExists(file string) (bool, error) { 154 _, err := os.Stat(file) 155 if os.IsNotExist(err) { 156 return false, nil 157 } 158 if err != nil { 159 return false, err 160 } 161 return true, nil 162 }