github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted_reindexer.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package db 13 14 import ( 15 "context" 16 "fmt" 17 18 "github.com/pkg/errors" 19 "github.com/sirupsen/logrus" 20 "github.com/weaviate/weaviate/adapters/repos/db/helpers" 21 "github.com/weaviate/weaviate/adapters/repos/db/inverted" 22 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv" 23 "github.com/weaviate/weaviate/entities/models" 24 "github.com/weaviate/weaviate/entities/schema" 25 "github.com/weaviate/weaviate/entities/storagestate" 26 "github.com/weaviate/weaviate/entities/storobj" 27 ) 28 29 type ShardInvertedReindexTask interface { 30 GetPropertiesToReindex(ctx context.Context, shard ShardLike, 31 ) ([]ReindexableProperty, error) 32 // right now only OnResume is needed, but in the future more 33 // callbacks could be added 34 // (like OnPrePauseStore, OnPostPauseStore, OnPreResumeStore, etc) 35 OnPostResumeStore(ctx context.Context, shard ShardLike) error 36 } 37 38 type ReindexableProperty struct { 39 PropertyName string 40 IndexType PropertyIndexType 41 NewIndex bool // is new index, there is no bucket to replace with 42 DesiredStrategy string 43 BucketOptions []lsmkv.BucketOption 44 } 45 46 type ShardInvertedReindexer struct { 47 logger logrus.FieldLogger 48 shard ShardLike 49 50 tasks []ShardInvertedReindexTask 51 class *models.Class 52 } 53 54 func NewShardInvertedReindexer(shard ShardLike, logger logrus.FieldLogger) *ShardInvertedReindexer { 55 class, _ := schema.GetClassByName(shard.Index().getSchema.GetSchemaSkipAuth().Objects, 56 shard.Index().Config.ClassName.String()) 57 58 return &ShardInvertedReindexer{ 59 logger: logger, 60 shard: shard, 61 tasks: []ShardInvertedReindexTask{}, 62 class: class, 63 } 64 } 65 66 func (r *ShardInvertedReindexer) AddTask(task ShardInvertedReindexTask) { 67 r.tasks = append(r.tasks, task) 68 } 69 70 func (r *ShardInvertedReindexer) Do(ctx context.Context) error { 71 for _, task := range r.tasks { 72 if err := r.checkContextExpired(ctx, "remaining tasks skipped due to context canceled"); err != nil { 73 return err 74 } 75 if err := r.doTask(ctx, task); err != nil { 76 return err 77 } 78 } 79 return nil 80 } 81 82 func (r *ShardInvertedReindexer) doTask(ctx context.Context, task ShardInvertedReindexTask) error { 83 reindexProperties, err := task.GetPropertiesToReindex(ctx, r.shard) 84 if err != nil { 85 r.logError(err, "failed getting reindex properties") 86 return errors.Wrapf(err, "failed getting reindex properties") 87 } 88 if len(reindexProperties) == 0 { 89 r.logger. 90 WithField("action", "inverted reindex"). 91 WithField("index", r.shard.Index().ID()). 92 WithField("shard", r.shard.ID()). 93 Debug("no properties to reindex") 94 return nil 95 } 96 97 if err := r.checkContextExpired(ctx, "pausing store stopped due to context canceled"); err != nil { 98 return err 99 } 100 101 if err := r.pauseStoreActivity(ctx); err != nil { 102 r.logError(err, "failed pausing store activity") 103 return err 104 } 105 106 bucketsToReindex := make([]string, len(reindexProperties)) 107 for i, reindexProperty := range reindexProperties { 108 if err := r.checkContextExpired(ctx, "creating temp buckets stopped due to context canceled"); err != nil { 109 return err 110 } 111 112 if !isIndexTypeSupportedByStrategy(reindexProperty.IndexType, reindexProperty.DesiredStrategy) { 113 err := fmt.Errorf("strategy '%s' is not supported for given index type '%d", 114 reindexProperty.DesiredStrategy, reindexProperty.IndexType) 115 r.logError(err, "invalid strategy") 116 return err 117 } 118 119 // TODO verify if property indeed need reindex before creating buckets 120 // (is filterable / is searchable / null or prop length index enabled) 121 bucketsToReindex[i] = r.bucketName(reindexProperty.PropertyName, reindexProperty.IndexType) 122 if err := r.createTempBucket(ctx, bucketsToReindex[i], reindexProperty.DesiredStrategy, 123 reindexProperty.BucketOptions...); err != nil { 124 r.logError(err, "failed creating temporary bucket") 125 return err 126 } 127 r.logger. 128 WithField("action", "inverted reindex"). 129 WithField("shard", r.shard.Name()). 130 WithField("property", reindexProperty.PropertyName). 131 WithField("strategy", reindexProperty.DesiredStrategy). 132 WithField("index_type", reindexProperty.IndexType). 133 Debug("created temporary bucket") 134 } 135 136 if err := r.reindexProperties(ctx, reindexProperties); err != nil { 137 r.logError(err, "failed reindexing properties") 138 return errors.Wrapf(err, "failed reindexing properties on shard '%s'", r.shard.Name()) 139 } 140 141 for i := range bucketsToReindex { 142 if err := r.checkContextExpired(ctx, "replacing buckets stopped due to context canceled"); err != nil { 143 return err 144 } 145 tempBucketName := helpers.TempBucketFromBucketName(bucketsToReindex[i]) 146 tempBucket := r.shard.Store().Bucket(tempBucketName) 147 tempBucket.FlushMemtable() 148 tempBucket.UpdateStatus(storagestate.StatusReadOnly) 149 150 if reindexProperties[i].NewIndex { 151 if err := r.shard.Store().RenameBucket(ctx, tempBucketName, bucketsToReindex[i]); err != nil { 152 r.logError(err, "failed renaming buckets") 153 return err 154 } 155 156 r.logger. 157 WithField("action", "inverted reindex"). 158 WithField("shard", r.shard.Name()). 159 WithField("bucket", bucketsToReindex[i]). 160 WithField("temp_bucket", tempBucketName). 161 Debug("renamed bucket") 162 } else { 163 if err := r.shard.Store().ReplaceBuckets(ctx, bucketsToReindex[i], tempBucketName); err != nil { 164 r.logError(err, "failed replacing buckets") 165 return err 166 } 167 168 r.logger. 169 WithField("action", "inverted reindex"). 170 WithField("shard", r.shard.Name()). 171 WithField("bucket", bucketsToReindex[i]). 172 WithField("temp_bucket", tempBucketName). 173 Debug("replaced buckets") 174 } 175 } 176 177 if err := r.checkContextExpired(ctx, "resuming store stopped due to context canceled"); err != nil { 178 return err 179 } 180 181 if err := r.resumeStoreActivity(ctx, task); err != nil { 182 r.logError(err, "failed resuming store activity") 183 return err 184 } 185 186 return nil 187 } 188 189 func (r *ShardInvertedReindexer) pauseStoreActivity(ctx context.Context) error { 190 if err := r.shard.Store().PauseCompaction(ctx); err != nil { 191 return errors.Wrapf(err, "failed pausing compaction for shard '%s'", r.shard.Name()) 192 } 193 if err := r.shard.Store().FlushMemtables(ctx); err != nil { 194 return errors.Wrapf(err, "failed flushing memtables for shard '%s'", r.shard.Name()) 195 } 196 r.shard.Store().UpdateBucketsStatus(storagestate.StatusReadOnly) 197 198 r.logger. 199 WithField("action", "inverted reindex"). 200 WithField("shard", r.shard.Name()). 201 Debug("paused store activity") 202 203 return nil 204 } 205 206 func (r *ShardInvertedReindexer) resumeStoreActivity(ctx context.Context, task ShardInvertedReindexTask) error { 207 if err := r.shard.Store().ResumeCompaction(ctx); err != nil { 208 return errors.Wrapf(err, "failed resuming compaction for shard '%s'", r.shard.Name()) 209 } 210 r.shard.Store().UpdateBucketsStatus(storagestate.StatusReady) 211 if err := task.OnPostResumeStore(ctx, r.shard); err != nil { 212 return errors.Wrap(err, "failed OnPostResumeStore") 213 } 214 215 r.logger. 216 WithField("action", "inverted reindex"). 217 WithField("shard", r.shard.Name()). 218 Debug("resumed store activity") 219 220 return nil 221 } 222 223 func (r *ShardInvertedReindexer) createTempBucket(ctx context.Context, name string, 224 strategy string, options ...lsmkv.BucketOption, 225 ) error { 226 tempName := helpers.TempBucketFromBucketName(name) 227 bucketOptions := append(options, lsmkv.WithStrategy(strategy)) 228 229 if err := r.shard.Store().CreateBucket(ctx, tempName, bucketOptions...); err != nil { 230 return errors.Wrapf(err, "failed creating temp bucket '%s'", tempName) 231 } 232 return nil 233 } 234 235 func (r *ShardInvertedReindexer) reindexProperties(ctx context.Context, reindexableProperties []ReindexableProperty) error { 236 checker := newReindexablePropertyChecker(reindexableProperties, r.class) 237 objectsBucket := r.shard.Store().Bucket(helpers.ObjectsBucketLSM) 238 239 r.logger. 240 WithField("action", "inverted reindex"). 241 WithField("shard", r.shard.Name()). 242 Debug("starting populating indexes") 243 244 i := 0 245 if err := objectsBucket.IterateObjects(ctx, func(object *storobj.Object) error { 246 // check context expired every 100k objects 247 if i%100_000 == 0 && i != 0 { 248 if err := r.checkContextExpired(ctx, "iterating through objects stopped due to context canceled"); err != nil { 249 return err 250 } 251 r.logger. 252 WithField("action", "inverted reindex"). 253 WithField("shard", r.shard.Name()). 254 Debugf("iterating through objects: %d done", i) 255 } 256 docID := object.DocID 257 properties, nilProperties, err := r.shard.AnalyzeObject(object) 258 if err != nil { 259 return errors.Wrapf(err, "failed analyzying object") 260 } 261 262 for _, property := range properties { 263 if err := r.handleProperty(ctx, checker, docID, property); err != nil { 264 return errors.Wrapf(err, "failed reindexing property '%s' of object '%d'", property.Name, docID) 265 } 266 } 267 for _, nilProperty := range nilProperties { 268 if err := r.handleNilProperty(ctx, checker, docID, nilProperty); err != nil { 269 return errors.Wrapf(err, "failed reindexing property '%s' of object '%d'", nilProperty.Name, docID) 270 } 271 } 272 273 i++ 274 return nil 275 }); err != nil { 276 return err 277 } 278 279 r.logger. 280 WithField("action", "inverted reindex"). 281 WithField("shard", r.shard.Name()). 282 Debugf("iterating through objects: %d done", i) 283 284 return nil 285 } 286 287 func (r *ShardInvertedReindexer) handleProperty(ctx context.Context, checker *reindexablePropertyChecker, 288 docID uint64, property inverted.Property, 289 ) error { 290 reindexablePropValue := checker.isReindexable(property.Name, IndexTypePropValue) 291 reindexablePropSearchableValue := checker.isReindexable(property.Name, IndexTypePropSearchableValue) 292 293 if reindexablePropValue || reindexablePropSearchableValue { 294 schemaProp := checker.getSchemaProp(property.Name) 295 296 var bucketValue, bucketSearchableValue *lsmkv.Bucket 297 298 if reindexablePropValue { 299 bucketValue = r.tempBucket(property.Name, IndexTypePropValue) 300 if bucketValue == nil { 301 return fmt.Errorf("no bucket for prop '%s' value found", property.Name) 302 } 303 } 304 if reindexablePropSearchableValue { 305 bucketSearchableValue = r.tempBucket(property.Name, IndexTypePropSearchableValue) 306 if bucketSearchableValue == nil { 307 return fmt.Errorf("no bucket searchable for prop '%s' value found", property.Name) 308 } 309 } 310 311 propLen := float32(len(property.Items)) 312 for _, item := range property.Items { 313 key := item.Data 314 if reindexablePropSearchableValue && inverted.HasSearchableIndex(schemaProp) { 315 pair := r.shard.pairPropertyWithFrequency(docID, item.TermFrequency, propLen) 316 if err := r.shard.addToPropertyMapBucket(bucketSearchableValue, pair, key); err != nil { 317 return errors.Wrapf(err, "failed adding to prop '%s' value bucket", property.Name) 318 } 319 } 320 if reindexablePropValue && inverted.HasFilterableIndex(schemaProp) { 321 if err := r.shard.addToPropertySetBucket(bucketValue, docID, key); err != nil { 322 return errors.Wrapf(err, "failed adding to prop '%s' value bucket", property.Name) 323 } 324 } 325 } 326 } 327 328 // add non-nil properties to the null-state inverted index, 329 // but skip internal properties (__meta_count, _id etc) 330 if isMetaCountProperty(property) || isInternalProperty(property) { 331 return nil 332 } 333 334 // properties where defining a length does not make sense (floats etc.) have a negative entry as length 335 if r.shard.Index().invertedIndexConfig.IndexPropertyLength && property.Length >= 0 { 336 key, err := bucketKeyPropertyLength(property.Length) 337 if err != nil { 338 return errors.Wrapf(err, "failed creating key for prop '%s' length", property.Name) 339 } 340 if checker.isReindexable(property.Name, IndexTypePropLength) { 341 bucketLength := r.tempBucket(property.Name, IndexTypePropLength) 342 if bucketLength == nil { 343 return fmt.Errorf("no bucket for prop '%s' length found", property.Name) 344 } 345 if err := r.shard.addToPropertySetBucket(bucketLength, docID, key); err != nil { 346 return errors.Wrapf(err, "failed adding to prop '%s' length bucket", property.Name) 347 } 348 } 349 } 350 351 if r.shard.Index().invertedIndexConfig.IndexNullState { 352 key, err := bucketKeyPropertyNull(property.Length == 0) 353 if err != nil { 354 return errors.Wrapf(err, "failed creating key for prop '%s' null", property.Name) 355 } 356 if checker.isReindexable(property.Name, IndexTypePropNull) { 357 bucketNull := r.tempBucket(property.Name, IndexTypePropNull) 358 if bucketNull == nil { 359 return fmt.Errorf("no bucket for prop '%s' null found", property.Name) 360 } 361 if err := r.shard.addToPropertySetBucket(bucketNull, docID, key); err != nil { 362 return errors.Wrapf(err, "failed adding to prop '%s' null bucket", property.Name) 363 } 364 } 365 } 366 367 return nil 368 } 369 370 func (r *ShardInvertedReindexer) handleNilProperty(ctx context.Context, checker *reindexablePropertyChecker, 371 docID uint64, nilProperty inverted.NilProperty, 372 ) error { 373 if r.shard.Index().invertedIndexConfig.IndexPropertyLength && nilProperty.AddToPropertyLength { 374 key, err := bucketKeyPropertyLength(0) 375 if err != nil { 376 return errors.Wrapf(err, "failed creating key for prop '%s' length", nilProperty.Name) 377 } 378 if checker.isReindexable(nilProperty.Name, IndexTypePropLength) { 379 bucketLength := r.tempBucket(nilProperty.Name, IndexTypePropLength) 380 if bucketLength == nil { 381 return fmt.Errorf("no bucket for prop '%s' length found", nilProperty.Name) 382 } 383 if err := r.shard.addToPropertySetBucket(bucketLength, docID, key); err != nil { 384 return errors.Wrapf(err, "failed adding to prop '%s' length bucket", nilProperty.Name) 385 } 386 } 387 } 388 389 if r.shard.Index().invertedIndexConfig.IndexNullState { 390 key, err := bucketKeyPropertyNull(true) 391 if err != nil { 392 return errors.Wrapf(err, "failed creating key for prop '%s' null", nilProperty.Name) 393 } 394 if checker.isReindexable(nilProperty.Name, IndexTypePropNull) { 395 bucketNull := r.tempBucket(nilProperty.Name, IndexTypePropNull) 396 if bucketNull == nil { 397 return fmt.Errorf("no bucket for prop '%s' null found", nilProperty.Name) 398 } 399 if err := r.shard.addToPropertySetBucket(bucketNull, docID, key); err != nil { 400 return errors.Wrapf(err, "failed adding to prop '%s' null bucket", nilProperty.Name) 401 } 402 } 403 } 404 405 return nil 406 } 407 408 func (r *ShardInvertedReindexer) bucketName(propName string, indexType PropertyIndexType) string { 409 checkSupportedPropertyIndexType(indexType) 410 411 switch indexType { 412 case IndexTypePropValue: 413 return helpers.BucketFromPropNameLSM(propName) 414 case IndexTypePropSearchableValue: 415 return helpers.BucketSearchableFromPropNameLSM(propName) 416 case IndexTypePropLength: 417 return helpers.BucketFromPropNameLengthLSM(propName) 418 case IndexTypePropNull: 419 return helpers.BucketFromPropNameNullLSM(propName) 420 default: 421 return "" 422 } 423 } 424 425 func (r *ShardInvertedReindexer) tempBucket(propName string, indexType PropertyIndexType) *lsmkv.Bucket { 426 tempBucketName := helpers.TempBucketFromBucketName(r.bucketName(propName, indexType)) 427 return r.shard.Store().Bucket(tempBucketName) 428 } 429 430 func (r *ShardInvertedReindexer) checkContextExpired(ctx context.Context, msg string) error { 431 if ctx.Err() != nil { 432 r.logError(ctx.Err(), msg) 433 return errors.Wrapf(ctx.Err(), msg) 434 } 435 return nil 436 } 437 438 func (r *ShardInvertedReindexer) logError(err error, msg string, args ...interface{}) { 439 r.logger. 440 WithField("action", "inverted reindex"). 441 WithField("shard", r.shard.Name()). 442 WithError(err). 443 Errorf(msg, args...) 444 }