github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/shard_lazyloader.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package db 13 14 import ( 15 "context" 16 "errors" 17 "fmt" 18 "io" 19 "os" 20 "sync" 21 22 enterrors "github.com/weaviate/weaviate/entities/errors" 23 24 "github.com/go-openapi/strfmt" 25 "github.com/weaviate/weaviate/adapters/repos/db/indexcheckpoint" 26 "github.com/weaviate/weaviate/adapters/repos/db/indexcounter" 27 "github.com/weaviate/weaviate/adapters/repos/db/inverted" 28 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv" 29 "github.com/weaviate/weaviate/entities/additional" 30 "github.com/weaviate/weaviate/entities/aggregation" 31 "github.com/weaviate/weaviate/entities/backup" 32 "github.com/weaviate/weaviate/entities/filters" 33 "github.com/weaviate/weaviate/entities/models" 34 "github.com/weaviate/weaviate/entities/multi" 35 "github.com/weaviate/weaviate/entities/schema" 36 "github.com/weaviate/weaviate/entities/search" 37 "github.com/weaviate/weaviate/entities/searchparams" 38 "github.com/weaviate/weaviate/entities/storagestate" 39 "github.com/weaviate/weaviate/entities/storobj" 40 "github.com/weaviate/weaviate/usecases/monitoring" 41 "github.com/weaviate/weaviate/usecases/objects" 42 "github.com/weaviate/weaviate/usecases/replica" 43 ) 44 45 type LazyLoadShard struct { 46 shardOpts *deferredShardOpts 47 shard *Shard 48 loaded bool 49 mutex sync.Mutex 50 } 51 52 func NewLazyLoadShard(ctx context.Context, promMetrics *monitoring.PrometheusMetrics, 53 shardName string, index *Index, class *models.Class, jobQueueCh chan job, 54 indexCheckpoints *indexcheckpoint.Checkpoints, 55 ) *LazyLoadShard { 56 promMetrics.NewUnloadedshard(class.Class) 57 return &LazyLoadShard{ 58 shardOpts: &deferredShardOpts{ 59 promMetrics: promMetrics, 60 name: shardName, 61 index: index, 62 class: class, 63 jobQueueCh: jobQueueCh, 64 indexCheckpoints: indexCheckpoints, 65 }, 66 } 67 } 68 69 type deferredShardOpts struct { 70 promMetrics *monitoring.PrometheusMetrics 71 name string 72 index *Index 73 class *models.Class 74 jobQueueCh chan job 75 indexCheckpoints *indexcheckpoint.Checkpoints 76 } 77 78 func (l *LazyLoadShard) mustLoad() { 79 l.mustLoadCtx(context.Background()) 80 } 81 82 func (l *LazyLoadShard) mustLoadCtx(ctx context.Context) { 83 if err := l.Load(ctx); err != nil { 84 panic(err.Error()) 85 } 86 } 87 88 func (l *LazyLoadShard) Load(ctx context.Context) error { 89 l.mutex.Lock() 90 defer l.mutex.Unlock() 91 92 if l.loaded { 93 return nil 94 } 95 if l.shardOpts.class == nil { 96 l.shardOpts.promMetrics.StartLoadingShard("unknown class") 97 } else { 98 l.shardOpts.promMetrics.StartLoadingShard(l.shardOpts.class.Class) 99 } 100 shard, err := NewShard(ctx, l.shardOpts.promMetrics, l.shardOpts.name, l.shardOpts.index, 101 l.shardOpts.class, l.shardOpts.jobQueueCh, l.shardOpts.indexCheckpoints) 102 if err != nil { 103 msg := fmt.Sprintf("Unable to load shard %s: %v", l.shardOpts.name, err) 104 l.shardOpts.index.logger.WithField("error", "shard_load").WithError(err).Error(msg) 105 return errors.New(msg) 106 } 107 l.shard = shard 108 l.loaded = true 109 if l.shardOpts.class == nil { 110 l.shardOpts.promMetrics.FinishLoadingShard("unknown class") 111 } else { 112 l.shardOpts.promMetrics.FinishLoadingShard(l.shardOpts.class.Class) 113 } 114 return nil 115 } 116 117 func (l *LazyLoadShard) Index() *Index { 118 return l.shardOpts.index 119 } 120 121 func (l *LazyLoadShard) Name() string { 122 return l.shardOpts.name 123 } 124 125 func (l *LazyLoadShard) Store() *lsmkv.Store { 126 l.mustLoad() 127 return l.shard.Store() 128 } 129 130 func (l *LazyLoadShard) NotifyReady() { 131 l.mustLoad() 132 l.shard.NotifyReady() 133 } 134 135 func (l *LazyLoadShard) GetStatus() storagestate.Status { 136 l.mustLoad() 137 return l.shard.GetStatus() 138 } 139 140 func (l *LazyLoadShard) UpdateStatus(status string) error { 141 l.mustLoad() 142 return l.shard.UpdateStatus(status) 143 } 144 145 func (l *LazyLoadShard) FindUUIDs(ctx context.Context, filters *filters.LocalFilter) ([]strfmt.UUID, error) { 146 if err := l.Load(ctx); err != nil { 147 return []strfmt.UUID{}, err 148 } 149 return l.shard.FindUUIDs(ctx, filters) 150 } 151 152 func (l *LazyLoadShard) Counter() *indexcounter.Counter { 153 l.mustLoad() 154 return l.shard.Counter() 155 } 156 157 func (l *LazyLoadShard) ObjectCount() int { 158 l.mustLoad() 159 return l.shard.ObjectCount() 160 } 161 162 func (l *LazyLoadShard) ObjectCountAsync() int { 163 l.mutex.Lock() 164 if !l.loaded { 165 l.mutex.Unlock() 166 return 0 167 } 168 l.mutex.Unlock() 169 return l.shard.ObjectCountAsync() 170 } 171 172 func (l *LazyLoadShard) GetPropertyLengthTracker() *inverted.JsonPropertyLengthTracker { 173 l.mustLoad() 174 return l.shard.GetPropertyLengthTracker() 175 } 176 177 func (l *LazyLoadShard) PutObject(ctx context.Context, object *storobj.Object) error { 178 if err := l.Load(ctx); err != nil { 179 return err 180 } 181 return l.shard.PutObject(ctx, object) 182 } 183 184 func (l *LazyLoadShard) PutObjectBatch(ctx context.Context, objects []*storobj.Object) []error { 185 if err := l.Load(ctx); err != nil { 186 return []error{err} 187 } // TODO check 188 return l.shard.PutObjectBatch(ctx, objects) 189 } 190 191 func (l *LazyLoadShard) ObjectByID(ctx context.Context, id strfmt.UUID, props search.SelectProperties, additional additional.Properties) (*storobj.Object, error) { 192 if err := l.Load(ctx); err != nil { 193 return nil, err 194 } 195 return l.shard.ObjectByID(ctx, id, props, additional) 196 } 197 198 func (l *LazyLoadShard) Exists(ctx context.Context, id strfmt.UUID) (bool, error) { 199 if err := l.Load(ctx); err != nil { 200 return false, err 201 } 202 return l.shard.Exists(ctx, id) 203 } 204 205 func (l *LazyLoadShard) ObjectSearch(ctx context.Context, limit int, filters *filters.LocalFilter, keywordRanking *searchparams.KeywordRanking, sort []filters.Sort, cursor *filters.Cursor, additional additional.Properties) ([]*storobj.Object, []float32, error) { 206 if err := l.Load(ctx); err != nil { 207 return nil, nil, err 208 } 209 return l.shard.ObjectSearch(ctx, limit, filters, keywordRanking, sort, cursor, additional) 210 } 211 212 func (l *LazyLoadShard) ObjectVectorSearch(ctx context.Context, searchVector []float32, targetVector string, targetDist float32, limit int, filters *filters.LocalFilter, sort []filters.Sort, groupBy *searchparams.GroupBy, additional additional.Properties) ([]*storobj.Object, []float32, error) { 213 if err := l.Load(ctx); err != nil { 214 return nil, nil, err 215 } 216 return l.shard.ObjectVectorSearch(ctx, searchVector, targetVector, targetDist, limit, filters, sort, groupBy, additional) 217 } 218 219 func (l *LazyLoadShard) UpdateVectorIndexConfig(ctx context.Context, updated schema.VectorIndexConfig) error { 220 if err := l.Load(ctx); err != nil { 221 return err 222 } 223 return l.shard.UpdateVectorIndexConfig(ctx, updated) 224 } 225 226 func (l *LazyLoadShard) UpdateVectorIndexConfigs(ctx context.Context, updated map[string]schema.VectorIndexConfig) error { 227 if err := l.Load(ctx); err != nil { 228 return err 229 } 230 return l.shard.UpdateVectorIndexConfigs(ctx, updated) 231 } 232 233 func (l *LazyLoadShard) AddReferencesBatch(ctx context.Context, refs objects.BatchReferences) []error { 234 if err := l.Load(ctx); err != nil { 235 return []error{err} 236 } // TODO check 237 return l.shard.AddReferencesBatch(ctx, refs) 238 } 239 240 func (l *LazyLoadShard) DeleteObjectBatch(ctx context.Context, ids []strfmt.UUID, dryRun bool) objects.BatchSimpleObjects { 241 l.mustLoadCtx(ctx) 242 return l.shard.DeleteObjectBatch(ctx, ids, dryRun) 243 } 244 245 func (l *LazyLoadShard) DeleteObject(ctx context.Context, id strfmt.UUID) error { 246 if err := l.Load(ctx); err != nil { 247 return err 248 } 249 return l.shard.DeleteObject(ctx, id) 250 } 251 252 func (l *LazyLoadShard) MultiObjectByID(ctx context.Context, query []multi.Identifier) ([]*storobj.Object, error) { 253 if err := l.Load(ctx); err != nil { 254 return nil, err 255 } 256 return l.shard.MultiObjectByID(ctx, query) 257 } 258 259 func (l *LazyLoadShard) ID() string { 260 return shardId(l.shardOpts.index.ID(), l.shardOpts.name) 261 } 262 263 func (l *LazyLoadShard) drop() error { 264 // if not loaded, execute simplified drop without loading shard: 265 // - perform required actions 266 // - remove entire shard directory 267 // use lock to prevent eventual concurrent droping and loading 268 l.mutex.Lock() 269 if !l.loaded { 270 defer l.mutex.Unlock() 271 272 idx := l.shardOpts.index 273 className := idx.Config.ClassName.String() 274 shardName := l.shardOpts.name 275 276 // cleanup metrics 277 NewMetrics(idx.logger, l.shardOpts.promMetrics, className, shardName). 278 DeleteShardLabels(className, shardName) 279 280 // cleanup dimensions 281 if idx.Config.TrackVectorDimensions { 282 clearDimensionMetrics(l.shardOpts.promMetrics, className, shardName, 283 idx.vectorIndexUserConfig, idx.vectorIndexUserConfigs) 284 } 285 286 // cleanup queue 287 if l.shardOpts.indexCheckpoints != nil { 288 if err := l.shardOpts.indexCheckpoints.Drop(); err != nil { 289 return fmt.Errorf("delete checkpoint: %w", err) 290 } 291 } 292 293 // remove shard dir 294 if err := os.RemoveAll(shardPath(idx.path(), shardName)); err != nil { 295 return fmt.Errorf("delete shard dir: %w", err) 296 } 297 298 return nil 299 } 300 l.mutex.Unlock() 301 302 return l.shard.drop() 303 } 304 305 func (l *LazyLoadShard) addIDProperty(ctx context.Context) error { 306 if err := l.Load(ctx); err != nil { 307 return err 308 } 309 return l.shard.addIDProperty(ctx) 310 } 311 312 func (l *LazyLoadShard) addDimensionsProperty(ctx context.Context) error { 313 if err := l.Load(ctx); err != nil { 314 return err 315 } 316 return l.shard.addDimensionsProperty(ctx) 317 } 318 319 func (l *LazyLoadShard) addTimestampProperties(ctx context.Context) error { 320 if err := l.Load(ctx); err != nil { 321 return err 322 } 323 return l.shard.addTimestampProperties(ctx) 324 } 325 326 func (l *LazyLoadShard) createPropertyIndex(ctx context.Context, prop *models.Property, eg *enterrors.ErrorGroupWrapper) { 327 l.mustLoad() 328 l.shard.createPropertyIndex(ctx, prop, eg) 329 } 330 331 func (l *LazyLoadShard) BeginBackup(ctx context.Context) error { 332 if err := l.Load(ctx); err != nil { 333 return err 334 } 335 return l.shard.BeginBackup(ctx) 336 } 337 338 func (l *LazyLoadShard) ListBackupFiles(ctx context.Context, ret *backup.ShardDescriptor) error { 339 if err := l.Load(ctx); err != nil { 340 return err 341 } 342 return l.shard.ListBackupFiles(ctx, ret) 343 } 344 345 func (l *LazyLoadShard) resumeMaintenanceCycles(ctx context.Context) error { 346 if err := l.Load(ctx); err != nil { 347 return err 348 } 349 return l.shard.resumeMaintenanceCycles(ctx) 350 } 351 352 func (l *LazyLoadShard) SetPropertyLengths(props []inverted.Property) error { 353 l.mustLoad() 354 return l.shard.SetPropertyLengths(props) 355 } 356 357 func (l *LazyLoadShard) AnalyzeObject(object *storobj.Object) ([]inverted.Property, []inverted.NilProperty, error) { 358 l.mustLoad() 359 return l.shard.AnalyzeObject(object) 360 } 361 362 func (l *LazyLoadShard) Dimensions() int { 363 l.mustLoad() 364 return l.shard.Dimensions() 365 } 366 367 func (l *LazyLoadShard) QuantizedDimensions(segments int) int { 368 l.mustLoad() 369 return l.shard.QuantizedDimensions(segments) 370 } 371 372 func (l *LazyLoadShard) publishDimensionMetrics() { 373 l.mustLoad() 374 l.shard.publishDimensionMetrics() 375 } 376 377 func (l *LazyLoadShard) Aggregate(ctx context.Context, params aggregation.Params) (*aggregation.Result, error) { 378 if err := l.Load(ctx); err != nil { 379 return nil, err 380 } 381 return l.shard.Aggregate(ctx, params) 382 } 383 384 func (l *LazyLoadShard) MergeObject(ctx context.Context, object objects.MergeDocument) error { 385 if err := l.Load(ctx); err != nil { 386 return err 387 } 388 return l.shard.MergeObject(ctx, object) 389 } 390 391 func (l *LazyLoadShard) Queue() *IndexQueue { 392 l.mustLoad() 393 return l.shard.Queue() 394 } 395 396 func (l *LazyLoadShard) Queues() map[string]*IndexQueue { 397 l.mustLoad() 398 return l.shard.Queues() 399 } 400 401 func (l *LazyLoadShard) Shutdown(ctx context.Context) error { 402 if !l.isLoaded() { 403 return nil 404 } 405 return l.shard.Shutdown(ctx) 406 } 407 408 func (l *LazyLoadShard) ObjectList(ctx context.Context, limit int, sort []filters.Sort, cursor *filters.Cursor, additional additional.Properties, className schema.ClassName) ([]*storobj.Object, error) { 409 if err := l.Load(ctx); err != nil { 410 return nil, err 411 } 412 return l.shard.ObjectList(ctx, limit, sort, cursor, additional, className) 413 } 414 415 func (l *LazyLoadShard) WasDeleted(ctx context.Context, id strfmt.UUID) (bool, error) { 416 if err := l.Load(ctx); err != nil { 417 return false, err 418 } 419 return l.shard.WasDeleted(ctx, id) 420 } 421 422 func (l *LazyLoadShard) VectorIndex() VectorIndex { 423 l.mustLoad() 424 return l.shard.VectorIndex() 425 } 426 427 func (l *LazyLoadShard) VectorIndexes() map[string]VectorIndex { 428 l.mustLoad() 429 return l.shard.VectorIndexes() 430 } 431 432 func (l *LazyLoadShard) hasTargetVectors() bool { 433 l.mustLoad() 434 return l.shard.hasTargetVectors() 435 } 436 437 func (l *LazyLoadShard) Versioner() *shardVersioner { 438 l.mustLoad() 439 return l.shard.Versioner() 440 } 441 442 func (l *LazyLoadShard) isReadOnly() bool { 443 l.mustLoad() 444 return l.shard.isReadOnly() 445 } 446 447 func (l *LazyLoadShard) preparePutObject(ctx context.Context, shardID string, object *storobj.Object) replica.SimpleResponse { 448 l.mustLoadCtx(ctx) 449 return l.shard.preparePutObject(ctx, shardID, object) 450 } 451 452 func (l *LazyLoadShard) preparePutObjects(ctx context.Context, shardID string, objects []*storobj.Object) replica.SimpleResponse { 453 l.mustLoadCtx(ctx) 454 return l.shard.preparePutObjects(ctx, shardID, objects) 455 } 456 457 func (l *LazyLoadShard) prepareMergeObject(ctx context.Context, shardID string, object *objects.MergeDocument) replica.SimpleResponse { 458 l.mustLoadCtx(ctx) 459 return l.shard.prepareMergeObject(ctx, shardID, object) 460 } 461 462 func (l *LazyLoadShard) prepareDeleteObject(ctx context.Context, shardID string, id strfmt.UUID) replica.SimpleResponse { 463 l.mustLoadCtx(ctx) 464 return l.shard.prepareDeleteObject(ctx, shardID, id) 465 } 466 467 func (l *LazyLoadShard) prepareDeleteObjects(ctx context.Context, shardID string, ids []strfmt.UUID, dryRun bool) replica.SimpleResponse { 468 l.mustLoadCtx(ctx) 469 return l.shard.prepareDeleteObjects(ctx, shardID, ids, dryRun) 470 } 471 472 func (l *LazyLoadShard) prepareAddReferences(ctx context.Context, shardID string, refs []objects.BatchReference) replica.SimpleResponse { 473 l.mustLoadCtx(ctx) 474 return l.shard.prepareAddReferences(ctx, shardID, refs) 475 } 476 477 func (l *LazyLoadShard) commitReplication(ctx context.Context, shardID string, mutex *backupMutex) interface{} { 478 l.mustLoad() 479 return l.shard.commitReplication(ctx, shardID, mutex) 480 } 481 482 func (l *LazyLoadShard) abortReplication(ctx context.Context, shardID string) replica.SimpleResponse { 483 l.mustLoad() 484 return l.shard.abortReplication(ctx, shardID) 485 } 486 487 func (l *LazyLoadShard) reinit(ctx context.Context) error { 488 if err := l.Load(ctx); err != nil { 489 return err 490 } 491 return l.shard.reinit(ctx) 492 } 493 494 func (l *LazyLoadShard) filePutter(ctx context.Context, shardID string) (io.WriteCloser, error) { 495 if err := l.Load(ctx); err != nil { 496 return nil, err 497 } 498 return l.shard.filePutter(ctx, shardID) 499 } 500 501 func (l *LazyLoadShard) extendDimensionTrackerLSM(dimLength int, docID uint64) error { 502 if err := l.Load(context.Background()); err != nil { 503 return err 504 } 505 return l.shard.extendDimensionTrackerLSM(dimLength, docID) 506 } 507 508 func (l *LazyLoadShard) extendDimensionTrackerForVecLSM(dimLength int, docID uint64, vecName string) error { 509 if err := l.Load(context.Background()); err != nil { 510 return err 511 } 512 return l.shard.extendDimensionTrackerForVecLSM(dimLength, docID, vecName) 513 } 514 515 func (l *LazyLoadShard) addToPropertySetBucket(bucket *lsmkv.Bucket, docID uint64, key []byte) error { 516 l.mustLoad() 517 return l.shard.addToPropertySetBucket(bucket, docID, key) 518 } 519 520 func (l *LazyLoadShard) addToPropertyMapBucket(bucket *lsmkv.Bucket, pair lsmkv.MapPair, key []byte) error { 521 l.mustLoad() 522 return l.shard.addToPropertyMapBucket(bucket, pair, key) 523 } 524 525 func (l *LazyLoadShard) pairPropertyWithFrequency(docID uint64, freq, propLen float32) lsmkv.MapPair { 526 l.mustLoad() 527 return l.shard.pairPropertyWithFrequency(docID, freq, propLen) 528 } 529 530 func (l *LazyLoadShard) setFallbackToSearchable(fallback bool) { 531 l.mustLoad() 532 l.shard.setFallbackToSearchable(fallback) 533 } 534 535 func (l *LazyLoadShard) addJobToQueue(job job) { 536 l.mustLoad() 537 l.shard.addJobToQueue(job) 538 } 539 540 func (l *LazyLoadShard) uuidFromDocID(docID uint64) (strfmt.UUID, error) { 541 l.mustLoad() 542 return l.shard.uuidFromDocID(docID) 543 } 544 545 func (l *LazyLoadShard) batchDeleteObject(ctx context.Context, id strfmt.UUID) error { 546 if err := l.Load(ctx); err != nil { 547 return err 548 } 549 return l.shard.batchDeleteObject(ctx, id) 550 } 551 552 func (l *LazyLoadShard) putObjectLSM(object *storobj.Object, idBytes []byte) (objectInsertStatus, error) { 553 l.mustLoad() 554 return l.shard.putObjectLSM(object, idBytes) 555 } 556 557 func (l *LazyLoadShard) mutableMergeObjectLSM(merge objects.MergeDocument, idBytes []byte) (mutableMergeResult, error) { 558 l.mustLoad() 559 return l.shard.mutableMergeObjectLSM(merge, idBytes) 560 } 561 562 func (l *LazyLoadShard) deleteFromPropertySetBucket(bucket *lsmkv.Bucket, docID uint64, key []byte) error { 563 l.mustLoad() 564 return l.shard.deleteFromPropertySetBucket(bucket, docID, key) 565 } 566 567 func (l *LazyLoadShard) batchExtendInvertedIndexItemsLSMNoFrequency(b *lsmkv.Bucket, item inverted.MergeItem) error { 568 l.mustLoad() 569 return l.shard.batchExtendInvertedIndexItemsLSMNoFrequency(b, item) 570 } 571 572 func (l *LazyLoadShard) updatePropertySpecificIndices(object *storobj.Object, status objectInsertStatus) error { 573 l.mustLoad() 574 return l.shard.updatePropertySpecificIndices(object, status) 575 } 576 577 func (l *LazyLoadShard) updateVectorIndexIgnoreDelete(vector []float32, status objectInsertStatus) error { 578 l.mustLoad() 579 return l.shard.updateVectorIndexIgnoreDelete(vector, status) 580 } 581 582 func (l *LazyLoadShard) updateVectorIndexesIgnoreDelete(vectors map[string][]float32, status objectInsertStatus) error { 583 l.mustLoad() 584 return l.shard.updateVectorIndexesIgnoreDelete(vectors, status) 585 } 586 587 func (l *LazyLoadShard) hasGeoIndex() bool { 588 l.mustLoad() 589 return l.shard.hasGeoIndex() 590 } 591 592 func (l *LazyLoadShard) Metrics() *Metrics { 593 l.mustLoad() 594 return l.shard.Metrics() 595 } 596 597 func (l *LazyLoadShard) isLoaded() bool { 598 l.mutex.Lock() 599 defer l.mutex.Unlock() 600 601 return l.loaded 602 }