github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/bucket_recover_from_wal.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package lsmkv 13 14 import ( 15 "bufio" 16 "context" 17 "os" 18 "path/filepath" 19 "strings" 20 "time" 21 22 "github.com/pkg/errors" 23 "github.com/weaviate/weaviate/entities/diskio" 24 ) 25 26 func (b *Bucket) mayRecoverFromCommitLogs(ctx context.Context) error { 27 beforeAll := time.Now() 28 defer b.metrics.TrackStartupBucketRecovery(beforeAll) 29 30 // the context is only ever checked once at the beginning, as there is no 31 // point in aborting an ongoing recovery. It makes more sense to let it 32 // complete and have the next recovery (this is called once per bucket) run 33 // into this error. This way in a crashloop we'd eventually recover each 34 // bucket until there is nothing left to recover and startup could complete 35 // in time 36 if err := ctx.Err(); err != nil { 37 return errors.Wrap(err, "recover commit log") 38 } 39 40 list, err := os.ReadDir(b.dir) 41 if err != nil { 42 return err 43 } 44 45 var walFileNames []string 46 for _, fileInfo := range list { 47 if filepath.Ext(fileInfo.Name()) != ".wal" { 48 // skip, this could be disk segments, etc. 49 continue 50 } 51 52 walFileNames = append(walFileNames, fileInfo.Name()) 53 } 54 55 // recover from each log 56 for _, fname := range walFileNames { 57 path := filepath.Join(b.dir, strings.TrimSuffix(fname, ".wal")) 58 59 cl, err := newCommitLogger(path) 60 if err != nil { 61 return errors.Wrap(err, "init commit logger") 62 } 63 defer cl.close() 64 65 cl.pause() 66 defer cl.unpause() 67 68 mt, err := newMemtable(path, b.strategy, b.secondaryIndices, cl, b.metrics) 69 if err != nil { 70 return err 71 } 72 73 b.logger.WithField("action", "lsm_recover_from_active_wal"). 74 WithField("path", path). 75 Warning("active write-ahead-log found. Did weaviate crash prior to this? Trying to recover...") 76 77 meteredReader := diskio.NewMeteredReader(bufio.NewReader(cl.file), b.metrics.TrackStartupReadWALDiskIO) 78 79 err = newCommitLoggerParser(b.strategy, meteredReader, mt).Do() 80 if err != nil { 81 b.logger.WithField("action", "lsm_recover_from_active_wal_corruption"). 82 WithField("path", filepath.Join(b.dir, fname)). 83 Error(errors.Wrap(err, "write-ahead-log ended abruptly, some elements may not have been recovered")) 84 } 85 86 if err := mt.flush(); err != nil { 87 return errors.Wrap(err, "flush memtable after WAL recovery") 88 } 89 90 if mt.Size() == 0 { 91 continue 92 } 93 94 if err := b.disk.add(path + ".db"); err != nil { 95 return err 96 } 97 98 if b.strategy == StrategyReplace && b.monitorCount { 99 // having just flushed the memtable we now have the most up2date count which 100 // is a good place to update the metric 101 b.metrics.ObjectCount(b.disk.count()) 102 } 103 104 b.logger.WithField("action", "lsm_recover_from_active_wal_success"). 105 WithField("path", filepath.Join(b.dir, fname)). 106 Info("successfully recovered from write-ahead-log") 107 } 108 109 return nil 110 }